From a3c2cdee59c4a28129118606869af2b27c598b12 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 11 Oct 2023 16:08:52 +0200 Subject: [PATCH 01/76] added rex files --- tools/REX/REX.hpp | 2459 +++++++++++++++++++++++++++++++++++++++++ tools/REX/pepper.cu | 169 +++ tools/REX/teawREX.hpp | 470 ++++++++ 3 files changed, 3098 insertions(+) create mode 100644 tools/REX/REX.hpp create mode 100644 tools/REX/pepper.cu create mode 100644 tools/REX/teawREX.hpp diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp new file mode 100644 index 0000000000..703f799d95 --- /dev/null +++ b/tools/REX/REX.hpp @@ -0,0 +1,2459 @@ +/*** + * ______ _______ __ + * | ___ \ ___\ \ / / + * | |_/ / |__ \ V / + * | /| __| / \ + * | |\ \| |___/ /^\ \ + * \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ZW: all fcns within the REX standard sit in the +// namespace REX +// Note that as a convention, std::string_view objects will be +// referred to as strings unless the difference is relevant +namespace REX +{ + #pragma warning( push ) + #pragma warning( disable : 4101) + static const size_t npos = -1; + #pragma warning( pop ) + // ZW: minimal fcn for counting the amount of times + // a given search term appears in a string + int nuStrCount( std::string_view searchString, std::string_view searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = searchString.find(searchTerm, pos)) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: fcn for finding the location of each + // entry of seachTerm in the given string textFile + // Pre-allocates vector memory using nuStrCount + std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ) + { + auto eachPos = std::make_shared>(); + eachPos->reserve( nuStrCount(textFile, searchTerm) ); + eachPos->push_back( textFile.find( searchTerm ) ); + size_t currPos = textFile.find( searchTerm, eachPos->at(0) + 1 ); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = textFile.find( searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element differentiated by linebreaks in the original string + // Removes sequential linebreaks, ie "\n\n\n" would + // only result in a single element separation + std::shared_ptr> nuLineSplitter( std::string_view currEvt ) + { + auto lineBreaks = nuFindEach( currEvt, "\n" ); + std::vector trueBreaks; + trueBreaks.reserve( lineBreaks->size() ); + for( int k = 0 ; k < lineBreaks->size() - 1 ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + trueBreaks.push_back( (*lineBreaks)[k] ); + } + auto splitLines = std::make_shared>(); + splitLines->reserve( trueBreaks.size() ); + size_t startPos = 0; + for( auto k : trueBreaks ) + { + splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); + startPos = k; + } + if( auto strung = currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } + return splitLines; + } + + // ZW: fcn for finding each linebreak in a string, + // returning a vector of the positions of "\n" characters + // Ignores sequential linebreaks, ie would only return { } + // for the string "\n\n\n\n" + std::shared_ptr> lineFinder( std::string_view currEvt, size_t startPos = 0, size_t endPos = npos ) + { + auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); + auto truBreaks = std::make_shared>(); + truBreaks->reserve( lineBreaks->size() ); + for( int k = 0 ; k < lineBreaks->size() ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + truBreaks->push_back( (*lineBreaks)[k] ); + } + return truBreaks; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element separated by blankspace (" ") in the original string + // Ignores sequential blankspaces, as well as linebreaks + // ie "hello \n\n\n world" would return {"hello", "world"} + // Does not ignore linebreaks that are not separated from words + // by anything other than blankspace, + // ie "hello \n\n\nworld \n\n" would return {"hello", "\n\nworld"} + std::shared_ptr> nuWordSplitter( std::string_view currEvt ) + { + std::vector noSpace; + size_t nuStart = currEvt.find_first_not_of( " " ); + size_t nuEnd = currEvt.find(" ", nuStart+1 ); + auto splitWords = std::make_shared>(); + splitWords->reserve(13); + while( nuStart != npos ) + { + std::string_view word = currEvt.substr( nuStart, nuEnd - nuStart ); + if( word == "" || word == "\n" || word == " " ){ + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + continue; } + splitWords->push_back( currEvt.substr( nuStart, nuEnd - nuStart ) ); + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + } + return splitWords; + } + + // ZW: fcn for splitting a string into a vector of strings, + // elements separated by any form of blankspace in the original string + // Ignores sequential blankspaces of all forms + std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) + { + auto lines = nuLineSplitter( currEvt ); + auto splitString = std::make_shared>(); + splitString->reserve( lines->size() * lines->at(0).size() ); + for( auto line : *lines ) + { + auto words = nuWordSplitter(line); + for( auto word : *words ) + { + if( word == "" || word == "\n" || word == " " ){continue;} + splitString->push_back( word ); + } + } + return splitString; + } + + // ZW: templated fcn for comparing two + // string-like objects, ignoring cases + template + bool clStringComp( const Str1& org, const Str2& comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + template + bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ + return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + + // ZW: templated fcn for finding a caseless substring searchTerm in srcFile + // On failure to find searchTerm, returns REX::npos + template + size_t clStringFind( const Str1& srcFile, const Str2& searchTerm, size_t strtPt = 0 ){ + size_t strLen = searchTerm.size(); + if( srcFile.size() == 0 || srcFile.size() < strLen ){ return npos; } + for( size_t k = strtPt ; k < srcFile.size() - strLen; ++k ) + { + if( clStringComp( srcFile.substr(k, strLen), searchTerm ) ){ return k; } + } + return npos; + } + + // ZW: templated fcn for finding a caseless substring searchTerm of srcFile + // fulfilling a particular predicate cond( size_t, string ) + template + size_t clStringFindIf( const Str1& srcFile, const Str2& searchTerm, std::function& cond, size_t strtPt = 0 ) + { + auto currPt = clStringFind( srcFile, searchTerm, strtPt ); + bool condStat = cond( currPt, srcFile ); + while( !( condStat ) && currPt != npos) + { + currPt = clStringFind( srcFile, searchTerm, currPt + 1 ); + condStat = cond( currPt, srcFile ); + } + return currPt; + } + + // ZW: templated fcn for counting the number of occurances of + // caseless substring searchTerm in string-like object srcFile + template + int clStrCount( Str1 srcFile, Str2 searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = clStringFind( srcFile, searchTerm, pos ) ) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: templated fcn for finding each instance of + // of substring searchTerm of string-like object srcFile + template + std::shared_ptr> clFindEach( Str1 srcFile, Str2 searchTerm ) + { + auto eachPos = std::make_shared>(); + auto nos = clStrCount(srcFile, searchTerm); + if( nos == 0 ){ return eachPos; } + eachPos->reserve( nos ); + eachPos->push_back( clStringFind( srcFile, searchTerm ) ); + size_t currPos = clStringFind( srcFile, searchTerm, eachPos->at(0) + 1); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = clStringFind( srcFile, searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for finding left angle bracket + // indicating the start of a new node in an XML file + std::shared_ptr nodeStartFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = std::make_shared(parseFile.find("<", strtPos)); + while( parseFile[*retPtr + 1] == '!' || parseFile[*retPtr +1] == '/' || parseFile[*retPtr +1] == '?' ){ + *retPtr = parseFile.find("<", *retPtr +1); + } + return retPtr; + } + + // ZW: fcn for finding left angle bracket + // indicating an end of a node in an XML file + std::shared_ptr nodeEndFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = std::make_shared(parseFile.find("<", strtPos)); + while( parseFile[*retPtr + 1] != '/' ){ + *retPtr = parseFile.find("<", *retPtr +1); + } + return retPtr; + } + + // ZW: struct for handling tags in XML node opening tags + struct xmlTag { + public: + void setVal( std::string_view valSet ){ modded = true; val = valSet; } + void setId( std::string_view idSet ){ modded = true; id = idSet; } + std::string_view getVal(){ return val; } + std::string_view getId(){ return id; } + bool isModded(){ return modded; } + xmlTag(){ modded = false; return; } + xmlTag( xmlTag& oldTag ){ + modded = false; val = oldTag.getVal(); id = oldTag.getId(); + } + xmlTag( std::string_view initId, std::string_view initVal){ + modded = false; val = initVal; id = initId; + } + protected: + bool modded; + std::string_view val; + std::string_view id; + }; + + // ZW: function for parsing XML opening + // tags and returning the next header tag + std::shared_ptr xmlTagParser( std::string_view tagLine, size_t& equPt ) + { + auto tagBreaker = tagLine.find_first_not_of(" ", equPt+1); // ZW: need to determine what type of quotation marks are used + auto tagEnder = tagLine.find( tagLine[tagBreaker], tagBreaker+1); + auto attrEnd = tagLine.find_last_not_of(" ", equPt - 1) ; + auto attrStart = tagLine.find_last_of(" ", attrEnd) + 1; + auto tagPtr = std::make_shared(tagLine.substr(attrStart, attrEnd - attrStart + 1), tagLine.substr(tagBreaker + 1, tagEnder - tagBreaker - 1)); + equPt = tagLine.find("=", equPt + 1); // ZW: modifies input equPt to point to the next equality sign in tagLine + return tagPtr; + } + + // ZW: struct for handling nodes in generic XML files + struct xmlNode { + public: + xmlNode(){ modded = false; return; } + xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ + modded = false; xmlFile = originFile; start = begin; children = childs; + if( xmlFile.substr(start, 1) != "<" ){ start = *nodeStartFind( xmlFile, size_t(start) ); } + size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ ", trueStart) - trueStart ); + if( xmlFile.find( ">", trueStart ) < xmlFile.find( "/", trueStart ) ){ + content = xmlFile.substr( xmlFile.find( ">", trueStart ) + 1, xmlFile.find( "", trueStart ) - 1 ); + } + } + std::vector> getChildren(){ return children; } + std::vector> getTags(){ return tags; } + std::string_view getFile(){ return xmlFile; } + std::string_view getName(){ return name; } + std::string_view getContent(){ return content; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + virtual bool isModded(){ return modded; } + virtual bool isModded( bool deep ){ + bool modStat = isModded(); + if( !deep ){ return modStat; } + for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } + return modStat; + } + bool isWritten(){ return written; } + bool isParsed(){ return parsed; } + void setModded( bool mod ){ modded = mod; } + bool deepModded(){ return deepMod; } + bool deepParse(){ return deepParsed; } + void parser( bool recursive ){ + parsed = parse( recursive ); + } + void addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } + void addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } + void setFile( std::string_view file ){ modded = true; xmlFile = file; } + void setName( std::string_view newName ){ modded = true; name = newName; } + void setCont( std::string_view cont ){ modded = true; content = cont; } + protected: + virtual bool parse(){ + auto topStat = parseTop(); + auto contStat = parseContent(); + return ( topStat && contStat ); + } + virtual bool parse( bool recurs ) + { + bool parseSt = parse(); + if( !recurs ){ return parseSt; } + bool childSt = parseChildren( recurs ); + deepMod = true; + return (parseSt && childSt ); + } + bool parseTop(){ + if( xmlFile == "" ){ return false; } + size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); + while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } + return true; + } + virtual bool parseContent(){ + if( xmlFile == "" ){ return false; } + auto firstR = xmlFile.find_first_of( ">/", start ); + auto nodeStrEnd = xmlFile.find(">", firstR); + if( firstR < nodeStrEnd ){ content = ""; end = nodeStrEnd + 2; parsed = true; return true; } + auto endNode = *nodeEndFind( xmlFile, start ); + auto startNode = *nodeStartFind( xmlFile, start + 1 ); + if( startNode > endNode ){end = xmlFile.find( ">", endNode ) + 1; content = xmlFile.substr( xmlFile.find( ">", start ) + 1, endNode - xmlFile.find( ">", start ) - 1 ); return true; } + auto endPt = xmlFile.find( std::string("", start) + 1, startNode - xmlFile.find(">") - 1 ); + end = xmlFile.find( ">", endPt ) + 2; + while( startNode < endNode ){ + auto nextNode = std::make_shared( xmlFile, startNode ); + children.push_back( nextNode ); + int starts = 0; + while( startNode < endNode ) + { + startNode = *nodeStartFind( xmlFile, startNode + 1 ); + ++starts; + } + for( int k = 0 ; k < starts ; ++k ){ endNode = *nodeEndFind( xmlFile, endNode + 1 ); } + if( endNode > end ){ break; } + } + return true; + } + bool parseChildren( bool recursive ){ + bool status = true; + if( recursive ){ + for( auto child : children ) + { + status = (status && child->parse( true )); + deepParsed = true; + } + } else { + for( auto child : children ) + { + status = (status && child->parse()); + deepParsed = true; + } + } + return status; + } + std::shared_ptr writtenSelf; + bool deepMod = false; + std::vector> children; + std::vector> tags; + std::string_view xmlFile; + std::string_view name; + std::string_view content; + size_t start; + size_t end = npos; + bool modded = false; + bool written = false; + bool parsed = false; + bool deepParsed = false; + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + virtual void headWriter() { + nodeHeader = "<" + std::string(name) ; + for( auto tag : tags ){ + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + virtual void endWriter() { + nodeEnd = "\n"; + } + virtual void contWriter() { + if( children.size() > 0 ){ + nodeContent = std::string(content.substr(0, children[0]->start - 1 )); + } else { + nodeContent = std::string(content); + } + } + virtual void childWriter() { + for(auto child : children){ + nodeContent += (*child->nodeWriter()); + } + } + virtual void endFinder(){ + auto headEnd = xmlFile.find(">", start); + auto slashPos = xmlFile.find("/", start); + if( headEnd > slashPos ){ end = headEnd; } + else{ end = xmlFile.find( ">", xmlFile.find( "( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + endFinder(); + if( start > xmlFile.size() ){ start = 0; } + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + public: + virtual void childCounter( int& noChilds ) + { + for( auto child : children ) + { + child->childCounter( noChilds ); + if( child->end == 0 ){ --noChilds; } + } + noChilds += children.size(); + } + virtual std::shared_ptr nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + }; + + // ZW: function for large scale parsing of XML files + // sequentially goes through the document and + // recursively calls itself while the next node + // beginning is closer than the next node ending + std::shared_ptr xmlPtrParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for handling rwgt parameter sets + // in the LHE header initrwgt node + struct headWeight : xmlNode { + public: + int getId(){ return id; } + std::string_view getTag(){ return idTag; } + bool hasTag(){ return (idTag.size() > 0); } + headWeight(){ name = "weight"; return; } + headWeight( std::string_view paramSet, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight( xmlNode& node ) : xmlNode( node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( xmlNode* node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::shared_ptr node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight( std::string_view paramSet, std::string& idText){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; + } + void setId( std::string identity ){ modded = true; idTag = identity; } + protected: + std::string idTag; + long unsigned int id = npos; + void headWriter() override{ + if( tags.size() == 0 ){ + if( idTag == "" ){ nodeHeader = ""; return; } + if( id == npos ){ nodeHeader = ""; return; } + nodeHeader = ""; + } + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void headWriter( bool incId ){ + if( !incId ){ headWriter(); return; } + if( idTag == "" ){ headWriter(); return; } + if( id == npos ){ nodeHeader = "getId() == "id" ){ continue; } + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void endWriter() override{ + nodeEnd = "\n"; + } + void contWriter() override{ + nodeContent = std::string( content ); + } + void childWriter() override{ + for( auto child : children){ + if( child->getName() == "weight" ){ continue; } + nodeContent += *(child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + } + void fullWriter() override{ + if( isModded() || !isWritten() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } + } + void fullWriter( bool incId, bool hasChildren=true ){ + if( isModded() || !isWritten() ){ + headWriter( incId ); + contWriter(); + childWriter( ); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } + } + }; + + // ZW: struct for handling rwgt groups + // in the LHE header initrwgt node + struct weightGroup : xmlNode { + public: + bool getIncId(){ return includeId; } + void setIncId( bool nuIncId ){ includeId = nuIncId; } + std::vector> getWgts(){ return paramSets; } + void addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } + void addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} + weightGroup() : xmlNode(){ name = "weightgroup"; return; } + weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup( std::vector nuWgts ) : xmlNode(){ + name = "weightgroup"; + for( auto wgt : nuWgts ){ + paramSets.push_back( std::make_shared( wgt ) ); + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode( originFile, begin, childs ){ + name = "weightgroup"; + if( parseTop() ){ + int checker = 0; + for( auto tag : tags ){ + if( tag->getId() == "name" ){ ++checker; rwgtName = tag->getVal(); } + if( tag->getId() == "weight_name_strategy" ){ ++checker; wgtNamStrat = tag->getVal(); + if(wgtNamStrat == "includeIdInWeightName"){ includeId = true; } } + if( checker == 2 ){ break; } + } + } + } + protected: + std::string_view rwgtName; + std::string_view wgtNamStrat; + bool includeId = false; + std::vector> paramSets; + bool nu; + std::string_view idTag; + int id; + void headWriter() override{ + nodeHeader = "nodeWriter()); + } + } + void childWriter() override{ + for(auto child : children){ + nodeContent += (*child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + void endWriter() override{ nodeEnd = "\n"; } + }; + + struct initRwgt : xmlNode { + public: + std::vector> getGroups(){ return groups; } + size_t noGrps(){ return groups.size(); } + void addGroup( weightGroup nuGroup ){ + modded = true; + auto nuGrpPtr = std::make_shared( nuGroup ); + if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } + } + void addGroup( std::shared_ptr nuGroup ){ + modded = true; + if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } + } + void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + void addWgt( unsigned int index, headWeight nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + initRwgt() : xmlNode(){ name = "initrwgt"; return; } + initRwgt( std::vector> nuGroups ) : xmlNode(){ + name = "initrwgt"; + for( auto group : nuGroups ){ + groups.push_back( std::make_shared( *group ) ); + } + } + initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + protected: + bool grpIsInit = false; + bool grpInit( std::shared_ptr& wgt ){ + if( grpIsInit ){ return true; } + else{ + groups = std::vector>( 1, wgt ); + grpIsInit = true; + return false; + } + } + std::vector> groups; + void contWriter() override{ + nodeContent = "\n"; + for( auto group : groups ){ + nodeContent += (*group->nodeWriter()); + } + } + void childWriter() override{ + for( auto child : children ){ + if( child->getName() == "weightgroup" ){ continue; } + nodeContent += (*child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + }; + + // ZW: struct for handling event + // in event blocks of LHE files + struct bodyWgt : xmlNode { + public: + void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } + void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} + void setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} + void setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} + void setId( std::string nuId ){ + modded = true; id = nuId; + for( auto tag : tags ){ + if( tag->getId() == "id" ){ tag->setVal( id ); return; } + } + addTag( std::make_shared( "id", id ) ); + } + void setModded( bool nuModded ){ modded = nuModded; } + std::string_view getComment(){ return comment; } + std::string_view getValS(){ return valS; } + double getValD(){ return valD; } + bodyWgt() : xmlNode(){ return; } + bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode( originFile, begin, childs ){ + auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); + valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); + valD = std::stod( valS ); + } + bodyWgt( double value, std::string& idTag ){ + setVal( value ); + id = idTag; + addTag( std::make_shared("id",id) ); + } + void appendWgt( std::shared_ptr document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + void appendWgt( std::string* document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + std::shared_ptr appendWgt( std::string_view document ){ + if(!isWritten() ){ fullWriter(); } + auto retDoc = std::make_shared( document ); + *retDoc += *writtenSelf; + return retDoc; + } + protected: + std::string_view comment; + std::string valS; + std::string id; + double valD; + void fullWriter() override { + writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + *writtenSelf += ">" + std::string(valS) + "\n"; + modded = false; + written = true; + } + }; + + // ZW: fcn for finding the next block in SLHA format + // parameter cards + size_t blockFinder( std::string_view parseFile, size_t startPt = 0 ){ + if( parseFile.size() > 5 ){ if( clStringComp(parseFile.substr(0,5), std::string("block") )){ return size_t(0); } } + return clStringFind( parseFile, std::string("\nblock"), startPt ); + } + + // ZW: fcn for finding each decay line in SLHA format + // parameter card + std::vector decBlockStractor( std::string_view parseFile ){ + auto allDs = nuFindEach( parseFile, "\nd" ); + std::vector decLines; + decLines.reserve( allDs->size() ); + for( auto pos : *allDs ) + { + if( !(clStringComp(parseFile.substr( pos+1, 5 ), std::string("decay"))) ){ continue; } + decLines.push_back( parseFile.substr( pos + 1, parseFile.find( "\n", pos + 1 ) - pos - 1 ) ); + } + return decLines; + } + + // ZW: fcn for extracting the relevant lines of + // a block in SLHA format parameter card + // removes any comments between start of this block and next + // and also ignores lines with other information, + // eg DECAY lines + std::vector blockLineStractor( std::string_view parseFile, size_t startPt = 0){ + auto blockStrt = blockFinder( parseFile, startPt ); + auto newBlock = blockFinder( parseFile, blockStrt + 1 ); + std::vector paramLines; + paramLines.reserve( nuStrCount( parseFile, "\n" ) ); + std::shared_ptr> parLines; + if( newBlock == npos ){ parLines = nuLineSplitter( parseFile.substr( blockStrt ) ); } + else{ parLines = nuLineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } + for( auto line : *parLines ) + { + if( line.size() == 0 ){ continue; } + if( line[0] != ' ' ){ continue; } + paramLines.push_back( line ); + } + return paramLines; + } + + // ZW: struct for handling the first line of + // LHE format event block + struct evHead { + public: + std::string_view getComment(){ return comment; } + std::string_view getWeight(){ return weight; } + std::string_view getScale(){ return scale; } + std::string_view getAQED(){ return aqed; } + std::string_view getAQCD(){ return aqcd; } + std::string_view getNprt(){ return nprt; } + std::string_view getProcID(){ return procid; } + bool isModded(){ return modded; } + bool isWritten(){ return written; } + void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } + void setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } + void setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } + void setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } + void setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } + void setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } + std::shared_ptr getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + evHead(){ return; } + evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) + { + if( originFile.size() == 0){ return; } + beginLine = originFile.find_first_not_of("\n ", beginLine); + if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + nprt = evLine->at(0) ; + procid = evLine->at(1); + weight = evLine->at(2); + scale = evLine->at(3); + aqed = evLine->at(4); + aqcd = evLine->at(5); + } + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view weight; + std::string_view scale; + std::string_view aqed; + std::string_view aqcd; + std::string_view nprt; + std::string_view procid; + bool modded = false; + bool written = false; + void writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + auto retText = std::make_shared( " " ); + *content = " " + std::string( nprt ); + for( int k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } + *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + }; + + // ZW: struct for handling particle lines + // in LHE format event block + struct lhePrt{ + public: + std::string_view getLine(){ return sourceFile; } + std::string_view getComment(){ return comment; } + std::vector getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } + std::string_view getE(){ return energy; } + std::string_view getMass(){ return mass; } + std::string_view getVTim(){ return vtim; } + std::string_view getSpin(){ return spin; } + std::string_view getPDG(){ return pdg; } + std::string_view getStatus(){ return status; } + std::vector getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } + std::vector getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } + void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } + void setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } + void setMass( std::string_view nuM ){ modded = true; mass = nuM; } + void setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } + void setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } + void setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } + void setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } + void setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } + void setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } + bool isModded(){ return modded; } + bool isWritten(){ return written; } + std::shared_ptr getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + lhePrt(){ return; } + lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) + { + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + pdg = evLine->at(0); + status = evLine->at(1); + mothers[0] = evLine->at(2); mothers[1] = evLine->at(3); + icol[0] = evLine->at(4); icol[1] = evLine->at(5); + for( int k = 6 ; k < 9 ; ++k){ + mom[k-6] = evLine->at(k); + } + energy = evLine->at(9); + mass = evLine->at(10); + vtim = evLine->at(11); + spin = evLine->at(12); + if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } + } + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view mom[3]; + std::string_view energy; + std::string_view mass; + std::string_view vtim; + std::string_view spin; + std::string_view pdg; + std::string_view status; + std::string_view mothers[2]; + std::string_view icol[2]; + bool modded = false; + bool written = false; + void writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + *content = ""; + for( int k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } + *content += std::string(pdg) + " " + std::string(status); + for( auto mum : mothers ){ *content += " " + std::string( mum ); } + for( auto col : icol ){ *content += " " + std::string( col ); } + for( auto pval : mom ){ *content += " " + std::string(pval); } + *content += " " + std::string( energy ) + " " + std::string( mass ) + " " + std::string( vtim ) + " " + std::string( spin ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + }; + + // ZW: struct for handling LHE format event block + struct event : xmlNode { + public: + evHead getHead(){ return header; } + std::vector> getPrts(){ return prts; } + std::vector> getWgts(){ return rwgt; } + void setHead( evHead head ){ modded = true; header = head; } + void addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } + void addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } + void setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } + void addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } + void addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } + void addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } + void addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } + bool newWeight(){ return addedWgt; } + int getNprt(){ return prts.size(); } + bool isModded() override{ return modded; } + bool isModded( bool deep ) override { + if( !deep ){ return modded; } + bool modStat = modded; + for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } + modStat = (modStat || header.isModded()); + for( auto prt : prts ){ if(modStat){ return modStat; }; modStat = (modStat || prt->isModded()); } + for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } + return modStat; + } + event(){ return; } + event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs) { + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart == npos ){ return; } + auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); + } + } + event( const xmlNode& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + bool prtsAreMod(){ + for( auto prt : prts ){ if( prt->isModded() ){ return true; } } + return false; + } + bool headIsMod(){ + return header.isModded(); + } + protected: + std::vector> rwgt; + std::shared_ptr childRwgt; + bool hasRwgt(){ + if( rwgt.size() > 0 ){ return true; } + return false; + } + bool rwgtChild(){ + if( childRwgt != nullptr ){ return true; } + for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } + return false; + } + bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } + bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } + evHead header; + std::vector> prts; + bool inRwgtChild( std::string_view name ){ + for( auto child : childRwgt->getChildren() ){ + for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } + } + return false; + } + bool checkRwgtOverlap(){ + for( auto wgt : rwgt ){ + for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } + } + return false; + } + void childRwgtWriter(){ + if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } + } + void vecRwgtWriter( bool midNode = false ){ + if( !midNode ){ nodeContent += "\n"; } + for( auto wgt : rwgt ){ + nodeContent += *wgt->nodeWriter(); + } + nodeContent += "\n"; + } + void rwgtWriter(){ + if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } + childRwgtWriter(); + nodeContent.erase( nodeContent.size() - 8, 8 ); + vecRwgtWriter(); + return; + } else { + if( hasRwgt() ){ vecRwgtWriter(); return; } + if( rwgtChild() ){ childRwgtWriter(); return; } + } + } + void contWriter() override { + nodeContent = "\n" + *header.getContent(); + for( auto prt : prts ){ + nodeContent += *prt->getContent(); + } + } + void childWriter() override { + for( auto child : children ){ + if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } + nodeContent += *child->nodeWriter(); + } + } + bool addedWgt = false; + void fullWriter() override { + if( isModded( false ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void fullWriter( bool deep ){ + if( !deep ){ fullWriter(); return; } + if( isModded( true ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void appendWgts(){ + if( !addedWgt ){ return; } + writtenSelf->erase( writtenSelf->size() - 17, 17 ); + for( auto wgt : rwgt ){ + if( !wgt->isWritten() ){ wgt->appendWgt( writtenSelf ); } + } + *writtenSelf += "\n\n"; + } + public: + std::shared_ptr nodeWriter() override { + if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + std::shared_ptr nodeWriter( bool recursive ){ + if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + }; + + // ZW: struct for handling the first line of + // LHE format init tag + struct lheInitHead{ + public: + std::string_view idbmup[2]; + std::string_view ebmup[2]; + std::string_view pdfgup[2]; + std::string_view pdfsup[2]; + std::string_view idwtup; + std::string_view nprup; + bool isWritten(){ return written; } + bool isModded(){ return modded; } + std::shared_ptr getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitHead( std::string_view initHead ){ + auto vals = *nuBlankSplitter( initHead ); + if( vals.size() < 10 ){ return; } + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + lheInitHead( xmlNode& initNode ) + { + if( initNode.getName() != "init" ){ return; } + auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; + auto endPos = initNode.getFile().find( "\n", startPos ); + auto vals = *nuBlankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(){ + *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) + + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; + written = true; + modded = false; + } + }; + + // ZW: struct for handling process lines + // in LHE format init tag + struct lheInitLine { + public: + std::string_view xsecup; + std::string_view xerrup; + std::string_view xmaxup; + std::string_view lprup; + bool isWritten(){ return written; } + bool isModded(){ return modded; } + std::shared_ptr getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitLine(){} + lheInitLine( std::string_view procLine ) + { + auto vals = *nuBlankSplitter( procLine ); + if( vals.size() < 4 ){ return; } + xsecup = vals[0]; + xerrup = vals[1]; + xmaxup = vals[2]; + lprup = vals[3]; + } + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(){ + *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; + written = true; + modded = false; + } + }; + + // ZW: struct for handling single parameter line in + // SLHA format parameter card + struct paramVal{ + public: + double value = 0; + int id = 0; + std::string_view realLine; + std::string_view comment; + std::string_view idStr; + std::string_view valStr; + virtual void parse(){ + id = std::stoi( std::string(idStr) ); + value = std::stod( std::string(valStr) ); + } + paramVal(){ realLine = ""; idStr = ""; valStr = ""; } + paramVal( std::string_view paramLine, bool parseOnline = false ) + { + if( paramLine.find("\n") != npos ){ + auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); + if( startPos!= npos ){ + auto endPos = paramLine.find("\n", startPos); + realLine = paramLine.substr(startPos, endPos - startPos - 1); + } else{ + realLine = paramLine.substr( 0, paramLine.find("\n") - 1 ); + } + } + realLine = paramLine; + auto vals = *nuBlankSplitter( realLine ); + idStr = vals[0]; + valStr = vals[1]; + if( parseOnline ){ + if( vals.size() > 2 ) + { + auto comStart = realLine.find("#"); + comStart = realLine.find_first_not_of( " #", comStart ); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + parse(); } + } + bool isMod(){ return modded; } + bool modded = false; + virtual std::shared_ptr selfWrite(){ + auto writeVal = std::make_shared(""); + if( isMod() ) + { + for( int k = idStr.size() ; k < 5 ; ++k ){ *writeVal += " "; } + *writeVal += std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + }; + + // ZW: struct for handling single DECAY line + // in SLHA format parameter card + struct decVal : paramVal{ + public: + void parse() override { + auto vals = *nuBlankSplitter( realLine ); + id = std::stoi( std::string(vals[1]) ); + value = std::stod( std::string(vals[2]) ); + if( vals.size() > 3 ) + { + auto comStart = realLine.find("#"); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + } + decVal( std::string_view paramLine = "", bool parseOnline = false ) : paramVal( paramLine, false ) + { + if( parseOnline ){ parse(); } + } + std::shared_ptr selfWrite() override { + auto writeVal = std::make_shared(""); + if( isMod() ) + { + *writeVal += "DECAY " + std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + }; + + // ZW: struct for handling parameter block + // in SLHA format parameter card + struct paramBlock { + public: + std::string_view realBlock; + size_t startPt; + std::string_view comment; + std::string_view initComm; + std::string_view name; + std::vector params; + virtual void parse( bool parseOnline = false ){ + if( realBlock.size() == 0 ){ return; } + if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } + auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); + name = realBlock.substr( namePt, realBlock.find_first_of( " \n", namePt ) - namePt ); + if( realBlock.find( " ", namePt ) < realBlock.find( "\n", namePt ) ) + {comment = realBlock.substr( namePt + name.size(), realBlock.find( "\n", namePt ) - namePt - name.size() ); } + auto paramLines = blockLineStractor( realBlock.substr( startPt ) ); + params.reserve( paramLines.size() ); + for( auto line : paramLines ) + { + params.push_back( paramVal( line, parseOnline ) ); + } + } + paramBlock(){ return; } + paramBlock( std::string_view paramSet, bool parseOnline = false ) + { + realBlock = paramSet; + startPt = clStringFind( realBlock, std::string("\nB") ); + if( parseOnline ){ parse(parseOnline); } + } + bool isMod(){ return modded; } + bool modded = false; + virtual std::shared_ptr selfWrite(){ + auto writeBlock = std::make_shared(""); + if( isMod() ) + { + *writeBlock += "\nBLOCK " + std::string(name); + if( comment.size() > 0 ){ + *writeBlock += " # " + std::string( comment ); + } + *writeBlock += "\n"; + for ( auto val : params ) + { + *writeBlock += *val.selfWrite(); + } + } + else{ if( startPt == npos ){ + *writeBlock += realBlock; + } else { + *writeBlock = realBlock.substr( startPt ); + } } + return writeBlock; + } + }; + + // ZW: struct for handling DECAY lines + // in SLHA format parameter card + struct decBlock : paramBlock { + public: + std::vector decays; + void parse( bool parseOnline = false ) override{ + if( realBlock.size() == 0 ){ return; } + auto decLines = clFindEach( realBlock, std::string("\ndecay") ); + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + void parse( std::shared_ptr> decLines, bool parseOnline = false ) { + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + decBlock( std::string_view paramSet = "", bool parseOnline = false ) : paramBlock( paramSet, parseOnline ) + { + realBlock = paramSet; + if( parseOnline ){ parse(parseOnline); } + } + std::shared_ptr selfWrite() override { + auto writeBlock = std::make_shared(""); + *writeBlock += "\n"; + for ( auto val : decays ) + { + *writeBlock += *val.selfWrite(); + } + return writeBlock; + } + }; + + // ZW: struct for handling SLHA parameter cards + struct lesHouchesCard { + public: + decBlock decays; + std::string_view xmlFile; + size_t start; + size_t end; + bool modded; + bool parsed; + std::string_view header; + std::vector blocks; + size_t blockStart; + std::function lambda = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; + std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; + std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) + { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; + void parse( bool parseOnline = false ) + { + if( parsed ){ return; } + if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } + auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); + auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); + header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); + for( int k = 0 ; k < blockPts->size() - 1 ; ++k ) + { + blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); + } + blocks.push_back(paramBlock(xmlFile.substr(blockPts->at(blockPts->size()-1), clStringFindIf( xmlFile, std::string("\n"), + lambda, blockPts->at(blockPts->size()-1) + 1) - blockPts->at(blockPts->size()-1)), parseOnline)); + decays = decBlock( xmlFile ); + decays.parse( decLines, parseOnline ); + parsed = true; + } + lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ + xmlFile = originFile; start = begin; size_t trueStart = originFile.find_first_not_of("\n ", begin+1); + modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" selfWrite(){ + auto writeCard = std::make_shared(header); + if( isMod() ) + { for( auto block : blocks ) + { *writeCard += *block.selfWrite(); } + *writeCard += *decays.selfWrite(); } + else{ + if( end != npos ){ *writeCard += std::string( xmlFile.substr( blockStart, end - blockStart ) ); + } else{ *writeCard += std::string( xmlFile.substr( blockStart ) ); } + } + return writeCard; + } + }; + + struct slhaNode : xmlNode { + public: + std::shared_ptr getParameters(){ + modded = true; + return parameterCard; + } + slhaNode() : xmlNode(){} + slhaNode( lesHouchesCard parameters ) : xmlNode(){ + parameterCard = std::make_shared( parameters ); + pCardInit = true; + } + slhaNode( std::shared_ptr parameters ) : xmlNode(){ + parameterCard = parameters; + pCardInit = true; + } + slhaNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); + } + slhaNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + : xmlNode( originFile, begin ){ + if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } + } + protected: + std::shared_ptr parameterCard; + bool pCardInit = false; + void headWriter() override{ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void endWriter() override{ nodeEnd += "\n"; } + void contWriter() override{ + if( pCardInit ){ + nodeContent = *parameterCard->selfWrite(); + } else { + nodeContent = content; + } + } + }; + + // ZW: struct for handling LHE init nodes + struct initNode : xmlNode { + public: + std::shared_ptr getHead(){ return initHead; } + std::vector> getLines(){ return initLines; } + void setHead( std::shared_ptr head ){ modded = true; initHead = head; } + void setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } + void addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } + initNode() : xmlNode(){ name = "init"; } + initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + : xmlNode( originFile, begin ){ + auto strtPt = originFile.find_first_not_of(" \n", originFile.find(">", start+1)); + content = originFile.substr( strtPt, originFile.find(" initHead; + std::vector> initLines; + bool parseContent() override{ + if( content.size() == 0 ){ return false; } + auto linebreaks = lineFinder( content ); + if( linebreaks->size() == 0 ){ return false; } + initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); + for( int k = 0 ; k < linebreaks->size() - 1 ; ++k ){ + initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); + } + return true; + } + void contWriter() override{ + if( isModded() ){nodeContent = std::string( content ); return; } + nodeContent = *initHead->getContent(); + for( auto line : initLines ){ + nodeContent += *line->getContent(); + } + } + }; + + // ZW: struct for explicitly handling LHE header nodes + struct lheHead : xmlNode { + public: + size_t addWgtGroup( std::shared_ptr& wgtGroup ){ + hasRwgt = true; + modded = true; + if( wgtGrpInit( wgtGroup ) ){ + rwgtNodes->addGroup( wgtGroup ); + } + return (rwgtNodes->noGrps() - 1); + } + size_t addWgtGroup( weightGroup wgtGroup ){ + hasRwgt = true; + modded = true; + auto wgtGrpPtr = std::make_shared( wgtGroup ); + if( wgtGrpInit( wgtGrpPtr ) ){ + rwgtNodes->addGroup( std::make_shared( wgtGroup ) ); + } + return (rwgtNodes->noGrps() - 1); + } + void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, headWeight nuWgt ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, std::shared_ptr nuWgt, std::string idTagg ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt->setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, headWeight nuWgt, std::string idTagg ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt.setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } + void setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } + std::vector> getWgtGroups(){ return rwgtNodes->getGroups(); } + std::shared_ptr getInitRwgt(){ return rwgtNodes; } + std::shared_ptr getParameters(){ return parameters; } + void setParameters( std::shared_ptr params ){ parameters = params; } + bool rwgtInc(){ return hasRwgt; } + lheHead(){ return; } + lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs){ + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + } + protected: + bool wgtGrpIsInit = false; + bool wgtGrpInit( std::shared_ptr& wgtGrp ){ + if( wgtGrpIsInit ){ return true; } + if( rwgtNodes == nullptr ){ + rwgtNodes = std::make_shared(); + wgtGrpIsInit = true; + rwgtNodes->addGroup( wgtGrp ); + return false; + } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); + } + std::shared_ptr parameters; + bool hasRwgt = false; + std::shared_ptr rwgtNodes; + std::vector> initrwgt; + bool relChildSet = false; + std::vector relChild; + void setRelChild(){ + if( relChildSet ){ return; } + relChild.reserve( children.size() ); + for( int k = 0 ; k < children.size() ; ++k ){ + auto child = &children[k]; + if( (*child)->getName() == "slha" ){ continue; } + if( (*child)->getName() == "initrwgt" ){ continue; } + relChild.push_back( k ); + } + relChildSet = true; + } + bool parseChildren( bool recursive ){ + bool status = true; + for( auto child : children ){ + if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } + child->parser( recursive ); + status = (status && child->isParsed() ); + deepParsed = true; + } + return status; + } + void headWriter() override{ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">\n"; + } + void childWriter() override{ + setRelChild(); + for( auto relKid : relChild ){ + nodeContent += *(children[relKid]->nodeWriter()); + } + if( parameters != nullptr ){ nodeContent += *parameters->nodeWriter(); } + if( hasRwgt ){ + nodeContent += *rwgtNodes->nodeWriter(); + } + } + void fullWriter() override{ + if( isModded() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + } + } + }; + + // ZW: struct for keeping track of appended weights in LHE node, + // since weight information is stored both in the header + // and in the individual events + struct newWgt{ + protected: + std::shared_ptr headWgt; + std::vector> bodyWgts; + public: + newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ + headWgt = heaWgt; bodyWgts = bodWgts; + } + newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ + headWgt = heaWgt; + bodyWgts = std::vector>(wgts->size()); + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + headWgt = std::make_shared(parameters, idTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } + newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); + headWgt = std::make_shared(parameters, newTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), newTag); + } + } + newWgt( std::string& parameters ){ + headWgt = std::make_shared(parameters); + } + newWgt( std::string& parameters, std::string& idTag ){ + headWgt = std::make_shared(parameters, idTag); + } + std::shared_ptr getHeadWgt(){ return headWgt; } + std::vector> getBodyWgts(){ return bodyWgts; } + void addBdyWgts( std::shared_ptr> wgts ){ + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + }; + + // ZW: general struct for handling LHE files explicitly + struct lheNode : xmlNode { + public: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + lheNode() : xmlNode(){} + lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs){ + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + } + bool isModded() override{ return modded; } + bool isModded( bool deep ) override{ + if( !deep ){ return isModded(); } + bool modStat = isModded(); + for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } + for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } + return modStat; + } + void addWgt( size_t index, newWgt& addedWgt ){ + header->addWgt( index, addedWgt.getHeadWgt() ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ + header->addWgt( index, addedWgt.getHeadWgt(), idTag ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + protected: + virtual void headerWriter(){ + nodeContent += "\n" + *header->nodeWriter(); + } + virtual void initWriter(){ + nodeContent += *init->nodeWriter(); + } + virtual void eventWriter(){ + for( auto event : events ){ + nodeContent += *event->nodeWriter(); + } + } + void contWriter() override{ + nodeContent = ""; + headerWriter(); + initWriter(); + eventWriter(); + } + void fullWriter() override{ + if( isModded( true ) ){ + headWriter(); + contWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr(start, end - start ) ); + written = true; + } + } + public: + virtual std::shared_ptr nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + }; + + // ZW: function for extracting event information from + // LHE files + std::vector>> valExtraction( const lheNode& lheFile ) + { + bool getGs = true; + auto momVec = std::make_shared>(); + auto wgtVec = std::make_shared>(); + auto gVec = std::make_shared>(); + momVec->reserve( lheFile.events.size() * 4 * std::stoi(std::string(lheFile.events[0]->getHead().getNprt())) ); + wgtVec->reserve( lheFile.events.size() ); + gVec->reserve( lheFile.events.size() ); + if( getGs ){ + for( auto event : lheFile.events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + } + } else{ + for( auto event : lheFile.events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + + } } + return {momVec, gVec, wgtVec}; + } + + // ZW: fcn for parsing an LHE format event block + // and return a REX format event object + std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + size_t equalSign = parseFile.find_first_of("=>", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format header + // and return a REX format lheHead object + std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + auto nuStrtPos = *nodeStartFind( parseFile, initPos); + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ + auto nuLine = parseFile.find("\n", parseFile.find("<", initPos)); + currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); + } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ + currNode->setInitRwgt( std::make_shared( currNode->getChildren()[ currNode->getChildren().size() - 1 ] ) ); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format file + // and return a REX format LHE node object + std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + auto nuStrtPos = *nodeStartFind( parseFile, initPos); + //if( nuStrtPos == parseFile.find("events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + // continue; + //} else if( nuStrtPos == parseFile.find("header = lheHeadParser( parseFile, initPos, endPos ); + // continue; + //} else if( nuStrtPos == parseFile.find("init = std::make_shared( parseFile, initPos ); + // initPos = *nodeStartFind( parseFile, endPos ); + // endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + // continue; + //} + if( parseFile.substr( initPos, 6 ) == "events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + continue; + } else if( parseFile.substr( initPos, 7 ) == "header = lheHeadParser( parseFile, initPos, endPos ); + continue; + } else if( parseFile.substr( initPos, 5 ) == "init = std::make_shared( parseFile, initPos ); + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + continue; + } else { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for treating individual HEP + // processes, formatted based on PDG codes + // and the LHE particle status standard + struct lheProc { + public: + std::vector minusOne; + std::vector plusOne; + std::vector minusTwo; + std::vector plusTwo; + std::vector plusThree; + std::vector minusNine; + std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; + lheProc( event& eventNode ) + { + for( auto prt : eventNode.getPrts() ) + { + valVecs[prt->getStatus()].push_back(prt->getPDG()); + } + } + }; + + // ZW: fcn for uploading text files + // to the program, pushing all characters to lowercase + std::shared_ptr filePuller( const std::string& fileLoc ) + { + std::ifstream fileLoad( fileLoc ); + std::stringstream buffer; + buffer << fileLoad.rdbuf(); + auto fileContent = std::make_shared(buffer.str()); + //std::transform( fileContent->begin(), fileContent->end(), fileContent->begin(), ::tolower ); + buffer.str(std::string()); + fileLoad.close(); + return fileContent; + } + + // ZW: fcn for saving std::string to disk + bool filePusher( std::string fileLoc, std::string fileCont ) + { + std::ofstream fileWrite( fileLoc ); + if(!fileWrite){return false;} + fileWrite << fileCont; + fileWrite.close(); + return true; + } + + // ZW: fcn for extracting the fill + // process information from an LHE event + std::shared_ptr>> pgdXtract( event& currEv, const std::vector& pdgVec ) + { + auto currProc = std::make_shared>>(); + auto &useProc = *currProc; + for( auto prt : currEv.getPrts() ) + { + useProc[ prt->getStatus() ].push_back(prt->getPDG()); + } + return currProc; + } + + // ZW: fcn for comparing two processes it the + // format output by pgdXtract + bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& pdgVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : pdgVec ) + { + if( firstVec[code] != secVec[code] ){ return false; } + } + return true; + } + + // ZW: fcn for processes in the lheProc struct format + bool procComp( const lheProc& firstProc, const lheProc& secProc, const std::vector& pdgVec ) + { + for( auto stat : pdgVec ) + { + if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } + if( firstProc.valVecs.at(stat) != secProc.valVecs.at(stat) ){ return false; } + } + return true; + } + + // ZW: fcn for checking whether a list of pdgKtract format + // processes sourceProcList contains a given process newProc + bool procVecContains( std::vector>>>& sourceProcList, + std::map>& newProc, const std::vector& pdgVec ) + { + int noProcs = sourceProcList.size(); + for( auto proc : sourceProcList ) + { + if( sameProcString( *proc, newProc, pdgVec ) ){ return true; } + } + return false; + } + + // ZW: fcn for checking whether a vector of lheProc structs + // procList contains a given lheProc nuProc + bool procListComp( const std::vector>& procList, const lheProc& nuProc, const std::vector& pdgVec ) + { + if( procList.size() != 0 ){ + for(auto proc : procList ) + { + if( procComp( *proc, nuProc, pdgVec ) ){ return true; } + } + } + return false; + } + + // ZW: fcn for extracting the different processes + // in a given REX format LHE file in the pdgXtract format + std::vector>>> procExtractor( const lheNode& lheFile ) + { + std::vector>>> procList; + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + for( auto event : lheFile.events ) + { + auto currProc = pgdXtract( *event, pdgVec ); + if( procVecContains( procList, *currProc, pdgVec ) ){ continue; } + procList.push_back(currProc); + } + return procList; + } + + // ZW: fcn for extracting the differenty processes + // in a given REX format LHE file in the lheProc format + std::vector> processPull( const lheNode& lheFile ) + { + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto event : lheFile.events ) + { + auto currProc = std::make_shared( *event ); + if( procListComp( procsList, *currProc, pdgVec ) ){ continue; } + procsList.push_back( currProc ); + } + return procsList; + } + + // ZW: fcn for keeping track of subprocess ordering + // in LHE file + int procPos( const std::vector>& evtSet, lheProc& currProc, + const std::vector& pdgVec ) + { + for( auto k = 0 ; k < evtSet.size() ; ++k ) + { + for( auto stat : pdgVec ) + { + if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } + } + return k; + } + return evtSet.size(); + } + + // ZW: fcn for extracting the subprocess ordering + // of LHE file + std::vector>> procOrder( const lheNode& lheFile, const std::vector>& evtSet ) + { + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector>> eventBools( evtSet.size()); + std::vector> pracBools( evtSet.size(), std::vector ( lheFile.events.size() )); + for( auto boolSets : pracBools ){ + std::fill( boolSets.begin(), boolSets.end(), false ); + } + for( auto k = 0 ; k < lheFile.events.size() ; ++k ) + { + auto currProc = lheProc(*lheFile.events[k]); + pracBools[ procPos(evtSet, currProc, pdgVec) ][ k ] = true; + } + for( int k = 0 ; k < eventBools.size() ; ++k ) + { + eventBools[k] = std::make_shared>( pracBools[k] ); + } + return eventBools; + } + + // ZW: fcn for reordering LHE file based on subprocess + std::shared_ptr>> eventReOrder( const lheNode& lheFile, std::vector relProc ) + { + auto reOrdered = std::make_shared>>(); + reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); + for( int k = 0 ; k < relProc.size() ; ++k ) + { + if(!relProc[k]){continue;} + reOrdered->push_back( lheFile.events[k] ); + } + return reOrdered; + } + + // ZW: wrapper for eventReOrder + std::vector>>> lheReOrder( const lheNode& lheFile ) + { + auto procSets = processPull( lheFile ); + auto relProcs = procOrder( lheFile, procSets ); + std::vector>>> ordProcs(procSets.size()); + for( int k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + // ZW: transposed event information struct + struct evtInfo { + public: + std::vector wgts; + std::vector scales; + std::vector aQEDs; + std::vector aQCDs; + std::vector nprts; + std::vector procIDs; + evtInfo( const std::vector>& lheFile = {} ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + nprts.push_back(evt->getHead().getNprt()); + procIDs.push_back(evt->getHead().getProcID()); + } + } + }; + + // ZW: transposed particle information struct + struct prtInfo { + public: + std::vector moms; + std::vector masses; + std::vector vtims; + std::vector spins; + std::vector statuses; + std::vector mothers; + std::vector icols; + std::vector pdgs; + prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto prt : evt->getPrts() ) + { + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( int k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + }; + + // ZW: transposed LHE file with a single process type + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + evtsHead = evtInfo(lheFile); + evtsData = prtInfo(lheFile, nPrt); + } + }; + + // ZW: transposed LHE file ordered by subprocess + struct transLHE { + public: + std::string_view xmlFile; + std::vector> subProcs; + transLHE( lheNode& lheFile ) + { + xmlFile = lheFile.getFile(); + auto procsOrdered = lheReOrder( lheFile ); + subProcs = std::vector>( procsOrdered.size() ); + for( int k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); + } + } + }; + + // ZW: vector transformation string_to_double + std::shared_ptr> vecStoD( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stod(std::string(stv)); + } ); + return valVec; + } + + // ZW: vector transformation string_to_int + std::shared_ptr> vecStoI( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stoi(std::string(stv)); + } ); + return valVec; + } + + // ZW: templated fcn for multiplying two vectors elementwise, + // assuming T has a multiplication operator* + template + std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ + if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } + auto valVec = std::make_shared>( vec1.size() ); + std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ + return v1 * v2; + } ); + return valVec; + } + + // ZW: bool struct to define which double values + // to extract transposed from LHE file + struct lheRetDs{ + public: + bool ebmup = false; + bool xsecup = false; + bool xerrup = false; + bool xmaxup = false; + bool xwgtup = false; + bool scalup = false; + bool aqedup = false; + bool aqcdup = false; + bool pup = true; + bool mass = false; + bool vtimup = false; + bool spinup = false; + std::vector getBools(){ + return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, + pup, mass, vtimup, spinup }; + } + }; + + // ZW: bool struct to define which int values + // to extract transposed from LHE file + struct lheRetInts{ + public: + //bool maxpup = false; + bool idbmup = false; + bool pdfgup = false; + bool pdfsup = false; + bool idwtup = false; + bool nprup = false; + bool lprup = false; + //bool maxnup = false; + bool nup = true; + bool idprup = false; + bool idup = true; + bool istup = true; + bool mothup = false; + bool icolup = false; + std::vector getBools(){ + return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, + nup, idprup, idup, istup, mothup, icolup }; + } + }; + + // ZW: function for extracting transposed double values + // from LHE file + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.init->getHead()->ebmup[0], lheFile.init->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + // ZW: function for extracting transposed int values + // from LHE file + std::shared_ptr>>> lheValInts( lheNode& lheFile, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idbmup[0], lheFile.init->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfgup[0], lheFile.init->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfsup[0], lheFile.init->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } +} \ No newline at end of file diff --git a/tools/REX/pepper.cu b/tools/REX/pepper.cu new file mode 100644 index 0000000000..b49c20fb16 --- /dev/null +++ b/tools/REX/pepper.cu @@ -0,0 +1,169 @@ +#include "PEPPER.hpp" +#include "fbridge.cc" +#include +#include + +struct fbridgeRunner{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + CppObjectInFortran *fBridge; + const unsigned int chanId = 0; + const unsigned int nMom = 4; + unsigned int nEvt; + unsigned int nPar; + fbrideRunner(){} + fbridgeRunner( PEP::lheNode& lheFile ){ + if( !lheFile.isParsed() ){ lheFile.deepParse(); } + nEvt = lheFile.events.size(); + rndHel = std::vector( nEvt, 0. ); + rndCol = std::vector( nEvt, 0. ); + selHel = std::vector( nEvt, 0 ); + selCol = std::vector( nEvt, 0 ); + nPar = lheFile.events[0]->getPrts().size(); + } + fbridgeRunner( std::shared_ptr lheFile ){ + if(!lheFile->isParsed() ){ lheFile->deepParse(); } + nEvt = lheFile->events.size(); + rndHel = std::vector( nEvt, 0. ); + rndCol = std::vector( nEvt, 0. ); + selHel = std::vector( nEvt, 0 ); + selCol = std::vector( nEvt, 0 ); + nPar = lheFile->events[0]->getPrts().size(); + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + std::shared_ptr> nuMom( nEvt ); + std::shared_ptr> nuAlphaS( nEvt ); + std::transform( momenta->begin(), momenta->end(), nuMom->begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS->begin(), alphaS->end(), nuAlphaS->begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + auto nuMom = std::vector( nEvt ); + auto nuAlphaS = std::vector( nEvt ); + std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } + auto evalScatAmps = std::shared_ptr>( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } +}; + +std::shared_ptr> meEval( std::vector& x, std::vector& y){ + int random = rand() % 10; + if( random == 0 ){ random = 11; } + auto thisIsIt = std::make_shared>( y.size(), random ); + return thisIsIt; +} + +int usage( char* argv0, int ret = 1 ) +{ + std::cout << "Usage: " << argv0 + << " [--lhefile=\"/YOUR/PATH/HERE\"|-lhe=\"/YOUR/PATH/HERE\"] [--rwgtcard=/YOUR/PATH/HERE|-rwgt=\"/YOUR/PATH/HERE\"]\n" + << "[--output=/YOUR/PATH/HERE\"|-out=\"/YOUR/PATH/HERE\"]\n"; + std::cout << "\n"; + std::cout << "The LHE file path should be with respect to the directory you are running\n"; + std::cout << "this program from, and similarly the rwgt_card should be as well.\n"; + if( typeid(FORTRANFPTYPE(0)) == typeid(double(0)) ){ + std::cout << "The program is currently compiled with double precision.\n"; + } else if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + std::cout << "The program is currently compiled with float precision.\n"; + } else{ std::cout << "The program is currently compiled with an unrecognised precision -- FPTYPE is neither float nor double.\n"; } + std::cout << "Numerical precision can only be redefined at compile time.\nIf you wish to change the precision, please recompile with the option \"FPTYPE=f\"/\"FPTYPE=d\"."; + return ret; +} + + +int main( int argc, char** argv ){ + std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; + + // READ COMMAND LINE ARGUMENTS + for( auto arg : argv ) + { + auto currArg = std::string( arg ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else + { + return usage( argv[0] ); + } + } + + if( lheFilePath.empty() || rwgtCardPath.empty() ){ + return usage( argv[0] ); + } + + std::string currPath = argv[0]; + + size_t slashPos = currPath.find_last_of( "/" ); + bool onWindows = false; + if( slashPos == std::string::npos ){ slashPos = currpath.find_last_of( "\\" ); onWindows = true; } + if( slashPos == std::string::npos ) + throw std::runtime_error( "Failed to determine current working directory -- need to know where program is run from to identify where to pull and push param_card.dat." ); + + if( onWindows ){ + if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "..\\..\\Cards\\param_card.dat"; + } else{ + slhaPath = "\\Cards\\param_card.dat"; + } + } else { + if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "../../Cards/param_card.dat"; + } else { + slhaPath = "/Cards/param_card.dat"; + } + } + + + PEP::PER::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + + auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); + + std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; + PEP::PER::rwgtRunner nuRun( fileCol, scatteringAmplitude ); + + + nuRun.runRwgt( outputPath ); + + return 0; + +} \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp new file mode 100644 index 0000000000..5c2eb2d3cd --- /dev/null +++ b/tools/REX/teawREX.hpp @@ -0,0 +1,470 @@ +/*** + * _ ______ _______ __ + * | | | ___ \ ___\ \ / / + * | |_ ___ __ ___ _| |_/ / |__ \ V / + * | __/ _ \/ _` \ \ /\ / / /| __| / \ + * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ + * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#include +#include +#include +#include +#include +#include +#include "REX.hpp" + +namespace REX::teaw +{ + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + struct rwgtVal : REX::paramVal{ + public: + std::string_view blockName; + bool allStat; + bool isAll(){ return (idStr == "all"); } + rwgtVal() : paramVal(){ return; } + rwgtVal( std::string_view paramLine ) + : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } + realLine = paramLine; + auto vals = *REX::nuBlankSplitter( realLine ); + blockName = vals[1]; + idStr = vals[2]; + valStr = vals[3]; + } + std::string_view getLine(){ return realLine; } + void outWrite( REX::paramBlock& srcBlock ){ + if ( isAll() ) + { + for( auto param : srcBlock.params ) + { + param.valStr = valStr; + param.modded = true; + } + return; + } + auto currPar = std::find_if( srcBlock.params.begin(), srcBlock.params.end(), + [&]( const REX::paramVal& parPar ){ return (parPar.idStr == idStr ); } ); + if( currPar == srcBlock.params.end() ){ + srcBlock.params.push_back( REX::paramVal( realLine.substr(realLine.find("set") + 4) ) ); + srcBlock.params[ srcBlock.params.size() - 1 ].modded = true; + srcBlock.modded = true; + return; + } + currPar->valStr = valStr; + currPar->modded = true; + srcBlock.modded = true; + return; + } + }; + + struct rwgtBlock { + public: + std::string_view name; + std::vector rwgtVals; + rwgtBlock( std::vector values = {}, std::string_view title = "" ) + { + name = title; + rwgtVals.resize( values.size() ); + for( int k = 0 ; k < values.size() ; ++k ) + { + rwgtVals[k] = rwgtVal( values[k] ); + } + } + rwgtBlock( const std::vector& vals, std::string_view title = "" ) + { + name = title; + rwgtVals = vals; + } + std::string_view getBlock(){ + if( written ){ return runBlock; } + runBlock = ""; + for( auto val : rwgtVals ){ + runBlock += std::string(val.getLine()) + "\n"; + } + written = true; + return runBlock; + } + void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) + { + for( auto parm : rwgtVals ) + { + parm.outWrite( srcBlock ); + } + srcBlock.modded = true; + return; + } + protected: + std::string runBlock; + bool written = false; + }; + + struct rwgtProc { + public: + std::vector rwgtParams; + std::string_view procString; + std::string_view rwgtName; + std::vector rwgtOpts; + void parse(){ + std::vector blocks; + std::vector>> params; + auto procLines = *REX::nuLineSplitter( procString ); + for( auto line : procLines ) + { + auto strtPt = line.find("set"); + auto words = *REX::nuWordSplitter( line ); + auto currBlock = words[1]; + auto loc = std::find_if( blocks.begin(), blocks.end(), + [&]( std::string_view block ){ return (block == currBlock); } ); + if( loc == blocks.end() ){ + blocks.push_back( currBlock ); + params.push_back( std::make_shared>( std::vector({rwgtVal( line )} ) )); } + else { + params[ std::distance( blocks.begin(), loc ) - 1 ]->push_back( rwgtVal( line ) ); + } + } + rwgtParams.reserve(blocks.size()); + for( int k = 0 ; k < blocks.size() ; ++k ) + { + rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); + } + } + rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ) + { + if( rwgtSet == "" ){ return; } + auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; + auto endLi = rwgtSet.find("\n", strtLi); + while( rwgtSet[rwgtSet.find_first_not_of("\n ", endLi)] == 's' ) + { endLi = rwgtSet.find( "\n", endLi + 1 ); } + procString = rwgtSet.substr( strtLi, endLi - strtLi ); + if( parseOnline ){ parse(); } + } + std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ + auto slhaOrig = std::make_shared( paramOrig ); + std::map blockIds; + for( int k = 0 ; k < slhaOrig->blocks.size() ; ++k ) + { slhaOrig->blocks[k].parse( true ); + auto nyama = std::pair( slhaOrig->blocks[k].name, k); + blockIds.insert( nyama ); } + for( auto rwgts : rwgtParams ) + { rwgts.outWrite( slhaOrig->blocks[ blockIds.at( rwgts.name ) ], blockIds ); } + slhaOrig->modded = true; + return slhaOrig; + } + std::string_view comRunProc(){ return procString; } + }; + + struct rwgtCard{ + public: + REX::lesHouchesCard slhaCard; + std::vector rwgtRuns; + std::vector rwgtProcs; + std::vector opts; + std::vector rwgtNames; + std::string_view srcCard; + void parse( bool parseOnline = false ) { + auto strt = srcCard.find("launch"); + while( auto commPos = srcCard.find_last_of("#", strt) > srcCard.find_last_of("\n", strt) ){ + if( commPos == REX::npos ){ + break; + } + strt = srcCard.find("launch", strt + 6 ); + } + while( auto chPos = srcCard.find( "set" ) < strt ){ + if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } + opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); + } + std::vector lnchPos({strt}); + auto nuLnch = srcCard.find( "launch", strt + 6 ); + while ( nuLnch != std::string_view::npos ) + { + if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } + nuLnch = srcCard.find( "launch", nuLnch + 6 ); + } + for( int k = 0 ; k < lnchPos.size() - 1 ; ++k ) + { + auto strtLi = srcCard.find( "set", lnchPos[k] ); + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); + if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ + auto strtPos = srcCard.find( "--", lnchPos[k] ); + while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ + auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); + if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. + rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); + } + if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } + strtPos = nuStrtPos; + } + } + } + size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); + if( srcCard.substr( endLi + 1, 3 ) == "set" ){ + while( srcCard.substr( endLi + 1, 3 ) == "set" ) + { + endLi = srcCard.find( "\n", endLi + 1 ); + } + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); + } + rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); + rwgtNames.reserve( rwgtRuns.size() ); + int p = 1; + for( auto run : rwgtRuns ){ + rwgtProcs.push_back( run.comRunProc() ); + if( run.rwgtName == "" ){ + rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); + } else { + rwgtNames.push_back( std::string(run.rwgtName) ); + } + } + } + rwgtCard( std::string_view reweight_card ){ + srcCard = reweight_card; + } + rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ){ + srcCard = reweight_card; + slhaCard = slhaParams; + if( parseOnline ){ parse( parseOnline ); } + } + std::vector> writeCards( REX::lesHouchesCard& slhaOrig ){ + std::vector> cardVec; + slhaOrig.parse(); + cardVec.reserve( rwgtRuns.size() ); + for( auto rwgt : rwgtRuns ) + { + cardVec.push_back( rwgt.outWrite( slhaOrig ) ); + } + return cardVec; + } + }; + + struct rwgtCollection { + public: + void setRwgt( std::shared_ptr rwgts ){ + if( rwgtSet ){ return; } + rwgtSets = rwgts; + rwgtSet = true; + } + void setRwgt( rwgtCard rwgts ){ + if( rwgtSet ){ return; } + setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; + } + void setSlha( std::shared_ptr slha ){ + if( slhaSet ){ return; } + slhaParameters = slha; + slhaParameters->parse(); + slhaSet = true; + } + void setSlha( REX::lesHouchesCard slha ){ + if( slhaSet ){ return; } + setSlha( std::make_shared( slha ) ); + slhaSet = true; + } + void setLhe( std::shared_ptr lhe ){ + if( lheFileSet ){ return; } + lheFile = lhe; + lheFileSet = true; + } + void setLhe( REX::lheNode lhe ){ + if( lheFileSet ){ return; } + setLhe( std::make_shared( lhe ) ); + lheFileSet = true; + } + void setLhe( std::string_view lhe_file ){ + if( lheFileSet ){ return; } + size_t strt = 0; + size_t post = *REX::nodeEndFind( lhe_file, strt ); + lheFile = REX::lheParser( lhe_file, strt, post ); + lheFileSet = true; + } + std::shared_ptr getRwgt(){ return rwgtSets; } + std::shared_ptr getSlha(){ return slhaParameters; } + std::shared_ptr getLhe(){ return lheFile; } + rwgtCollection(){ return; } + rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ + setLhe( lhe ); + setSlha( slha ); + setRwgt( rwgts ); + } + protected: + void setDoubles(){ + if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) + throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); + REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; + auto vecOfVecs = REX::lheValDoubles( *lheFile, returnBools ); + if( vecOfVecs->size() != 3 ) + throw std::runtime_error( "LHE file appears to contain multiple types of processes. This has not yet been implemented." ); + wgts = vecOfVecs->at( 0 ); gS = vecOfVecs->at( 1 ); momenta = vecOfVecs->at( 2 ); + } + std::shared_ptr rwgtSets; + std::shared_ptr slhaParameters; + std::shared_ptr lheFile; + std::shared_ptr> wgts; + std::shared_ptr> gS; + std::shared_ptr> momenta; + bool lheFileSet = false; + bool slhaSet = false; + bool rwgtSet = false; + }; + + struct rwgtFiles : rwgtCollection { + void setRwgtPath( std::string_view path ){ rwgtPath = path; } + void setSlhaPath( std::string_view path ){ slhaPath = path; } + void setLhePath( std::string_view path ){ lhePath = path; } + rwgtFiles() : rwgtCollection(){ return; } + rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ + setRwgtPath( reweight_card ); + setSlhaPath( slha_card ); + setLhePath( lhe_card ); + } + void initCards(){ + if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) + throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); + pullRwgt(); pullSlha(); pullLhe(); + setLhe( *lheCard ); + setSlha( std::make_shared( *slhaCard ) ); + setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); + setDoubles(); + } + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ){ + setLhePath( lhe_card ); + setSlhaPath( slha_card ); + setRwgtPath( reweight_card ); + initCards(); + } + protected: + void pullRwgt(){ + rewgtCard = REX::filePuller( rwgtPath ); + } + void pullSlha(){ + slhaCard = REX::filePuller( slhaPath ); + } + void pullLhe(){ + lheCard = REX::filePuller( lhePath ); + } + std::string rwgtPath; + std::string lhePath; + std::string slhaPath; + std::shared_ptr lheCard; + std::shared_ptr slhaCard; + std::shared_ptr rewgtCard; + }; + + struct rwgtRunner : rwgtFiles{ + public: + void setMeEval( std::function>(std::vector&, std::vector&)> eval ){ meEval = eval; meInit = true; } + rwgtRunner() : rwgtFiles(){ return; } + rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } + rwgtRunner( rwgtFiles& rwgts, std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( rwgts ){ + meEval = meCalc; + meInit = true; + } + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + meEval = meCalc; + meInit = true; + } + protected: + bool meInit = false; + bool meSet = false; + bool normWgtSet = false; + std::function>(std::vector&, std::vector&)> meEval; + std::shared_ptr> initMEs; + std::shared_ptr> meNormWgts; + std::shared_ptr rwgtGroup; + void setMEs(){ + initCards(); + if( !meInit ) + throw std::runtime_error( "No function for evaluating scattering amplitudes has been provided." ); + auto ins = meEval( *momenta, *gS ); + initMEs = std::make_shared>( ins->begin(), ins->begin() + wgts->size() ); + meSet = true; + } + bool setParamCard( std::shared_ptr slhaParams ){ + if( slhaPath == "" ) + throw std::runtime_error( "No parameter card path has been provided." ); + if( slhaParameters == nullptr ) + throw std::runtime_error( "No SLHA parameter card has been provided." ); + if( !REX::filePusher( slhaPath, *slhaParams->selfWrite() ) ) + throw std::runtime_error( "Failed to overwrite parameter card." ); + return true; + } + void setNormWgts(){ + if( !meSet ){ setMEs(); } + if( initMEs->size() != wgts->size() ) + throw std::runtime_error( "Inconsistent number of events and event weights." ); + meNormWgts = std::make_shared>( wgts->size() ); + for( size_t k = 0; k < initMEs->size(); k++ ){ + meNormWgts->at( k ) = wgts->at( k ) / initMEs->at( k ); + } + normWgtSet = true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + auto newMEs = meEval( *momenta, *gS ); + auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + auto newMEs = meEval( *momenta, *gS ); + auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ){ + bool writeSuccess = REX::filePusher( outputDir, *lheFile->nodeWriter() ); + if( !writeSuccess ) + throw std::runtime_error( "Failed to write LHE file." ); + return true; + } + public: + void runRwgt( const std::string& output ){ + setMEs(); + setNormWgts(); + rwgtGroup = std::make_shared(); + auto currInd = lheFile->header->addWgtGroup( rwgtGroup ); + auto paramSets = rwgtSets->writeCards( *slhaParameters ); + for( int k = 0 ; k < paramSets.size(); k++ ){ + singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); + std::cout << "."; + } + lheFileWriter( lheFile, output ); + REX::filePusher( slhaPath, *slhaCard ); + std::cout << "\nReweighting done.\n"; + } + }; +} \ No newline at end of file From ce4e5361d14fd21213cc4946860c1c4e29e4d986 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 27 Feb 2024 09:19:24 +0100 Subject: [PATCH 02/76] major changes to REX and teawREX, plus first base for template runfiles for MG reweighting --- tools/REX/REX.hpp | 1923 +++++++++++++++++++++++++++++++++---- tools/REX/rwgt_driver.cc | 115 +++ tools/REX/rwgt_instance.h | 69 ++ tools/REX/rwgt_runner.cc | 134 +++ tools/REX/teawREX.hpp | 247 ++++- 5 files changed, 2232 insertions(+), 256 deletions(-) create mode 100644 tools/REX/rwgt_driver.cc create mode 100644 tools/REX/rwgt_instance.h create mode 100644 tools/REX/rwgt_runner.cc diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index 703f799d95..c97f3e6a27 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -12,9 +12,12 @@ // IF YOU SEE THIS FILE, IT HAS BEEN SPREAD // FROM AN IMPROPER RELEASE. -// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. +#ifndef _REX_HPP_ +#define _REX_HPP_ + #include #include #include @@ -28,7 +31,10 @@ #include #include #include +#include #include +#include +#include // ZW: all fcns within the REX standard sit in the // namespace REX @@ -40,6 +46,69 @@ namespace REX #pragma warning( disable : 4101) static const size_t npos = -1; #pragma warning( pop ) + + using sortFcn = std::function>(std::vector)>; + using statSort = std::function>(std::string_view, std::vector)>; + + // ZW: index sorting function, which returs vector + // of the indices of the original vector sorted + // by default in ascending order + // ie, for [5.0, 0.25, 2.0, 9.2] returns [1, 2, 0, 3] + template + std::shared_ptr> indSort(const std::vector &vector, std::function comp = std::less()) + { + auto sorted = std::make_shared>(vector.size()); + std::iota(sorted->begin(), sorted->end(), 0); + std::stable_sort(sorted->begin(), sorted->end(), [&](size_t i, size_t j) { return comp(vector[i], vector[j]); }); + return sorted; + } + + // ZW: wrapper for indSort for comparing string-type arguments representing integers + template + std::shared_ptr> stoiSort(const std::vector &vector) + { + std::function stoicomp = [](const T& i, const T& j) { return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; + return indSort(vector, stoicomp); + } + + // ZW: wrapper for indSort for comparing string-type arguments representing doubles + template + std::shared_ptr> stodSort(const std::vector &vector) + { + std::function stodcomp = [](const T& i, const T& j) { return std::stod(std::string(i)) < std::stod(std::string(j)); }; + return indSort(vector, stodcomp); + } + + // ZW: templated fcn for finding the order of elements in a vector to_sort + // based on their order in a reference vector reference + // Elements not found in reference are represented by npos, + // including if to_sort is longer than reference + template + std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort) { + std::unordered_map> indexMap; + + // Populate indexMap with indices from vec1 + for (size_t i = 0; i < reference.size(); ++i) { + indexMap[reference[i]].push(i); + } + + std::shared_ptr> order; + order->reserve(to_sort.size()); // Pre-allocate memory + + for (const auto& elem : to_sort) { + auto it = indexMap.find(elem); + if (it != indexMap.end() && !it->second.empty()) { + order->push_back(it->second.front()); + it->second.pop(); + } else { + // Element in vec2 not found in vec1 + order->push_back(npos); + } + } + + return order; + } + // ZW: minimal fcn for counting the amount of times // a given search term appears in a string int nuStrCount( std::string_view searchString, std::string_view searchTerm ) @@ -79,7 +148,7 @@ namespace REX auto lineBreaks = nuFindEach( currEvt, "\n" ); std::vector trueBreaks; trueBreaks.reserve( lineBreaks->size() ); - for( int k = 0 ; k < lineBreaks->size() - 1 ; ++k ) + for( size_t k = 0 ; k < lineBreaks->size() - 1 ; ++k ) { if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} trueBreaks.push_back( (*lineBreaks)[k] ); @@ -92,7 +161,7 @@ namespace REX splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); startPos = k; } - if( auto strung = currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } + if( currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } return splitLines; } @@ -105,7 +174,7 @@ namespace REX auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); auto truBreaks = std::make_shared>(); truBreaks->reserve( lineBreaks->size() ); - for( int k = 0 ; k < lineBreaks->size() ; ++k ) + for( size_t k = 0 ; k < lineBreaks->size() ; ++k ) { if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} truBreaks->push_back( (*lineBreaks)[k] ); @@ -237,26 +306,46 @@ namespace REX // ZW: fcn for finding left angle bracket // indicating the start of a new node in an XML file - std::shared_ptr nodeStartFind( std::string_view parseFile, size_t strtPos ) + size_t nodeStartFind( std::string_view parseFile, size_t strtPos ) { - auto retPtr = std::make_shared(parseFile.find("<", strtPos)); - while( parseFile[*retPtr + 1] == '!' || parseFile[*retPtr +1] == '/' || parseFile[*retPtr +1] == '?' ){ - *retPtr = parseFile.find("<", *retPtr +1); + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] == '!' || parseFile[retPtr +1] == '/' || parseFile[retPtr +1] == '?' ){ + retPtr = parseFile.find("<", retPtr +1); } return retPtr; } + size_t endNodeStartFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeStartFind( parseFile, strtPos )); + } + + std::pair startNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeStartFind( parseFile, strtPos ), endNodeStartFind( parseFile, strtPos ) }; + } + // ZW: fcn for finding left angle bracket // indicating an end of a node in an XML file - std::shared_ptr nodeEndFind( std::string_view parseFile, size_t strtPos ) - { - auto retPtr = std::make_shared(parseFile.find("<", strtPos)); - while( parseFile[*retPtr + 1] != '/' ){ - *retPtr = parseFile.find("<", *retPtr +1); - } + size_t nodeEndFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] != '/' ){ + retPtr = parseFile.find("<", retPtr +1); + } return retPtr; } + size_t endNodeEndFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeEndFind( parseFile, strtPos )); + } + + std::pair endNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeEndFind( parseFile, strtPos ), endNodeEndFind( parseFile, strtPos ) }; + } + // ZW: struct for handling tags in XML node opening tags struct xmlTag { public: @@ -291,18 +380,153 @@ namespace REX return tagPtr; } + // ZW: struct for handling the tree structure of XML files, + // essentially just giving the positions of the beginning and + // end of each node s.t. the proper node structures can accurately + // detail where children begin and end while allowing for personal + // content between child nodes + struct xmlTree { + public: + xmlTree(){ return; } + xmlTree( std::string_view file ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v"); + if( file.compare(start, 1, "<") != 0 ) { + faux = true; + contSt = start; + end = std::min( nodeStartFind(file, start), nodeEndFind(file, start) ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 || file.compare(start + 1, 1, "?") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + end = std::min( nodeStartFind(file, contEnd), nodeEndFind(file, contEnd) ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + initialised = true; + return; + } + contSt = stEnd + 1; + auto stPos = nodeStartFind(file, start + 1); + stEnd = nodeEndFind(file, start + 1); + contEnd = std::min(stPos, stEnd); + while( stPos < stEnd ) + { + children->push_back( std::make_shared( file, stPos, stEnd ) ); + } + stEnd = endNodeEndFind(file, stEnd); + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + initialised = true; + } + xmlTree( std::string_view file, size_t& strt, size_t& nd ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v", strt); + if( file.compare(start, 1, "<") != 0) { + faux = true; + contSt = start; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + end = std::min( strt, nd ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + strt = nodeStartFind(file, contEnd); + nd = nodeEndFind(file, contEnd); + end = std::min( strt, nd ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + initialised = true; + return; + } + contSt = stEnd + 1; + strt = nodeStartFind(file, start + 1); + nd = nodeEndFind(file, start + 1); + contEnd = std::min(strt, nd); + while( strt < nd ) + { + children->push_back( std::make_shared( file, strt, nd ) ); + } + end = file.find_first_not_of(" \n\r\f\t\v", endNodeEndFind(file, nd) + 1); + initialised = true; + strt = end; + nd = nodeEndFind(file, strt); + } + auto& getChildren(){ return children; } + std::string_view& getOrigin(){ return origin; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + size_t getContStart(){ return contSt; } + size_t getContEnd(){ return contEnd; } + bool isFaux(){ return faux; } + bool isInit(){ return initialised; } + bool hasChildren(){ return children->size() > 0; } + protected: + std::shared_ptr>> children; // vector of pointers to children nodes + std::string_view origin; + size_t start; // position of opening bracket of node opening + size_t end; // position of final character of ending node, including trailing blankspace + size_t contSt; + size_t contEnd; + bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes + bool initialised; + }; + // ZW: struct for handling nodes in generic XML files struct xmlNode { public: xmlNode(){ modded = false; return; } xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ - modded = false; xmlFile = originFile; start = begin; children = childs; - if( xmlFile.substr(start, 1) != "<" ){ start = *nodeStartFind( xmlFile, size_t(start) ); } - size_t trueStart = xmlFile.find_first_not_of(" ", start+1); - name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ ", trueStart) - trueStart ); - if( xmlFile.find( ">", trueStart ) < xmlFile.find( "/", trueStart ) ){ - content = xmlFile.substr( xmlFile.find( ">", trueStart ) + 1, xmlFile.find( "", trueStart ) - 1 ); - } + modded = false; + xmlFile = originFile; + structure = xmlTree( originFile ); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto& child : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *child ) ); + } + } + xmlNode( xmlTree &tree ){ + modded = false; + structure = tree; + if( !structure.isInit() ){ return; } + xmlFile = structure.getOrigin(); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto& child : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *child ) ); + } } std::vector> getChildren(){ return children; } std::vector> getTags(){ return tags; } @@ -311,6 +535,7 @@ namespace REX std::string_view getContent(){ return content; } size_t getStart(){ return start; } size_t getEnd(){ return end; } + xmlTree getTree(){ return structure; } virtual bool isModded(){ return modded; } virtual bool isModded( bool deep ){ bool modStat = isModded(); @@ -320,6 +545,8 @@ namespace REX } bool isWritten(){ return written; } bool isParsed(){ return parsed; } + bool isFaux(){ return faux; } + bool hasChildren(){ return children.size() > 0; } void setModded( bool mod ){ modded = mod; } bool deepModded(){ return deepMod; } bool deepParse(){ return deepParsed; } @@ -347,32 +574,16 @@ namespace REX } bool parseTop(){ if( xmlFile == "" ){ return false; } + if( isFaux() ){ return true; } size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } return true; } virtual bool parseContent(){ if( xmlFile == "" ){ return false; } - auto firstR = xmlFile.find_first_of( ">/", start ); - auto nodeStrEnd = xmlFile.find(">", firstR); - if( firstR < nodeStrEnd ){ content = ""; end = nodeStrEnd + 2; parsed = true; return true; } - auto endNode = *nodeEndFind( xmlFile, start ); - auto startNode = *nodeStartFind( xmlFile, start + 1 ); - if( startNode > endNode ){end = xmlFile.find( ">", endNode ) + 1; content = xmlFile.substr( xmlFile.find( ">", start ) + 1, endNode - xmlFile.find( ">", start ) - 1 ); return true; } - auto endPt = xmlFile.find( std::string("", start) + 1, startNode - xmlFile.find(">") - 1 ); - end = xmlFile.find( ">", endPt ) + 2; - while( startNode < endNode ){ - auto nextNode = std::make_shared( xmlFile, startNode ); - children.push_back( nextNode ); - int starts = 0; - while( startNode < endNode ) - { - startNode = *nodeStartFind( xmlFile, startNode + 1 ); - ++starts; - } - for( int k = 0 ; k < starts ; ++k ){ endNode = *nodeEndFind( xmlFile, endNode + 1 ); } - if( endNode > end ){ break; } + end = structure.getContEnd(); + for( auto branch : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *branch ) ); } return true; } @@ -393,10 +604,14 @@ namespace REX } return status; } - std::shared_ptr writtenSelf; - bool deepMod = false; + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + xmlTree structure; std::vector> children; std::vector> tags; + std::shared_ptr writtenSelf; + bool deepMod = false; std::string_view xmlFile; std::string_view name; std::string_view content; @@ -406,10 +621,9 @@ namespace REX bool written = false; bool parsed = false; bool deepParsed = false; - std::string nodeHeader; - std::string nodeContent; - std::string nodeEnd; + bool faux = false; virtual void headWriter() { + if( isFaux() ){ return; } nodeHeader = "<" + std::string(name) ; for( auto tag : tags ){ nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; @@ -417,10 +631,12 @@ namespace REX nodeHeader += ">"; } virtual void endWriter() { - nodeEnd = "\n"; + if( isFaux() ){ return; } + auto endSt = xmlFile.find_last_of("<", end); + nodeEnd = xmlFile.substr( endSt, end - endSt ); } virtual void contWriter() { - if( children.size() > 0 ){ + if( hasChildren() ){ nodeContent = std::string(content.substr(0, children[0]->start - 1 )); } else { nodeContent = std::string(content); @@ -449,8 +665,6 @@ namespace REX written = true; modded = false; } else if( !isWritten() ){ - endFinder(); - if( start > xmlFile.size() ){ start = 0; } writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); written = true; } @@ -461,10 +675,15 @@ namespace REX for( auto child : children ) { child->childCounter( noChilds ); - if( child->end == 0 ){ --noChilds; } + if( child->end == 0 || child->isFaux() ){ --noChilds; } } noChilds += children.size(); - } + } + virtual int childCounter() { + int noChilds = 0; + childCounter( noChilds ); + return noChilds; + } virtual std::shared_ptr nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; @@ -480,7 +699,7 @@ namespace REX auto currNode = std::make_shared(parseFile, initPos); size_t equalSign = parseFile.find("=", initPos); size_t nodeInitEnd = parseFile.find(">", initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } @@ -489,14 +708,14 @@ namespace REX currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } // ZW: struct for handling rwgt parameter sets // in the LHE header initrwgt node - struct headWeight : xmlNode { + struct headWeight : public xmlNode { public: int getId(){ return id; } std::string_view getTag(){ return idTag; } @@ -536,6 +755,36 @@ namespace REX } } } + headWeight( xmlTree& tree ) : xmlNode( tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( xmlTree* tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } @@ -551,6 +800,7 @@ namespace REX if( idTag == "" ){ nodeHeader = ""; return; } if( id == npos ){ nodeHeader = ""; return; } nodeHeader = ""; + return; } nodeHeader = "( nodeHeader + nodeContent + nodeEnd ); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); written = true; modded = false; } @@ -611,7 +860,7 @@ namespace REX // ZW: struct for handling rwgt groups // in the LHE header initrwgt node - struct weightGroup : xmlNode { + struct weightGroup : public xmlNode { public: bool getIncId(){ return includeId; } void setIncId( bool nuIncId ){ includeId = nuIncId; } @@ -636,6 +885,42 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode( originFile, begin, childs ){ name = "weightgroup"; @@ -672,6 +957,7 @@ namespace REX } void childWriter() override{ for(auto child : children){ + if( child->getName() == "weight" ){ continue; } nodeContent += (*child->nodeWriter()); } } @@ -682,7 +968,7 @@ namespace REX void endWriter() override{ nodeEnd = "\n"; } }; - struct initRwgt : xmlNode { + struct initRwgt : public xmlNode { public: std::vector> getGroups(){ return groups; } size_t noGrps(){ return groups.size(); } @@ -718,6 +1004,14 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } + initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; @@ -726,6 +1020,14 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } + initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } protected: bool grpIsInit = false; bool grpInit( std::shared_ptr& wgt ){ @@ -755,9 +1057,9 @@ namespace REX } }; - // ZW: struct for handling event + // ZW: struct for handling weights // in event blocks of LHE files - struct bodyWgt : xmlNode { + struct bodyWgt : public xmlNode { public: void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} @@ -787,6 +1089,36 @@ namespace REX valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); valD = std::stod( valS ); } + bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } bodyWgt( double value, std::string& idTag ){ setVal( value ); id = idTag; @@ -893,7 +1225,7 @@ namespace REX evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) { if( originFile.size() == 0){ return; } - beginLine = originFile.find_first_not_of("\n ", beginLine); + beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } sourceFile = originFile.substr( beginLine, endLine - beginLine ); auto evLine = nuWordSplitter( sourceFile ); @@ -921,7 +1253,7 @@ namespace REX if( !isModded() ){ content = std::make_shared( sourceFile ); return; } auto retText = std::make_shared( " " ); *content = " " + std::string( nprt ); - for( int k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } + for( size_t k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); if( comment != "" ){ *content += " # " + std::string( comment ); } *content += "\n"; @@ -962,6 +1294,14 @@ namespace REX return content; } lhePrt(){ return; } + lhePrt( std::pair prtInfo ){ + status = std::to_string( prtInfo.first ); + pdg = std::to_string( prtInfo.second ); + } + lhePrt( std::pair& prtInfo ){ + status = std::to_string( prtInfo.first ); + pdg = std::to_string( prtInfo.second ); + } lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) { sourceFile = originFile.substr( beginLine, endLine - beginLine ); @@ -998,7 +1338,7 @@ namespace REX if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } *content = ""; - for( int k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } + for( size_t k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } *content += std::string(pdg) + " " + std::string(status); for( auto mum : mothers ){ *content += " " + std::string( mum ); } for( auto col : icol ){ *content += " " + std::string( col ); } @@ -1012,7 +1352,7 @@ namespace REX }; // ZW: struct for handling LHE format event block - struct event : xmlNode { + struct event : public xmlNode { public: evHead getHead(){ return header; } std::vector> getPrts(){ return prts; } @@ -1038,9 +1378,19 @@ namespace REX return modStat; } event(){ return; } + event( std::vector> prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + for( auto prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event( std::vector> prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + prts = prtInfo; + } event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode(originFile, begin, childs) { - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); if( trueStart == npos ){ return; } auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); @@ -1052,7 +1402,62 @@ namespace REX } event( const xmlNode& originFile ) : xmlNode( originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( const xmlNode* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( const std::shared_ptr& originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( xmlTree& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( xmlTree* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( std::shared_ptr originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); prts.reserve(vals->size()); @@ -1068,6 +1473,9 @@ namespace REX bool headIsMod(){ return header.isModded(); } + bool isSpecSort() const { return specSorted; } + sortFcn getSortFcn() const { return eventSort; } + statSort getStatSort() const { return specSort; } protected: std::vector> rwgt; std::shared_ptr childRwgt; @@ -1083,7 +1491,65 @@ namespace REX bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } evHead header; + bool hasBeenProc = false; std::vector> prts; + std::map> procMap; + std::map> procOrder; + sortFcn eventSort = []( std::vector vec ){ return stodSort( vec ); }; + statSort specSort = []( std::string_view stat, std::vector vec ){ return stodSort( vec ); }; + bool specSorted = false; + bool initProcMap(bool hard = false) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *stoiSort( stat->second ); + } + hasBeenProc = true; + return true; + } + bool initProcMap( sortFcn sorter, bool hard = false ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = false; + eventSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter( stat->second ); + } + hasBeenProc = true; + return true; + } + bool initProcMap( statSort sorter, bool hard = false ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = true; + specSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter(stat->first, stat->second ); + } + hasBeenProc = true; + return true; + } bool inRwgtChild( std::string_view name ){ for( auto child : childRwgt->getChildren() ){ for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } @@ -1179,6 +1645,38 @@ namespace REX if( addedWgt ){ appendWgts(); } return writtenSelf; } + auto &getProc(){ + if( initProcMap() ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(){ + if( initProcMap() ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProc() const{ + if ( hasBeenProc ){ return procMap; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + auto &getProcOrder() const{ + if ( hasBeenProc ){ return procOrder; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + auto &getProc(sortFcn sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(sortFcn sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProc(statSort sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(statSort sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } }; // ZW: struct for handling the first line of @@ -1322,7 +1820,7 @@ namespace REX // ZW: struct for handling single DECAY line // in SLHA format parameter card - struct decVal : paramVal{ + struct decVal : public paramVal{ public: void parse() override { auto vals = *nuBlankSplitter( realLine ); @@ -1411,7 +1909,7 @@ namespace REX // ZW: struct for handling DECAY lines // in SLHA format parameter card - struct decBlock : paramBlock { + struct decBlock : public paramBlock { public: std::vector decays; void parse( bool parseOnline = false ) override{ @@ -1479,7 +1977,7 @@ namespace REX auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); - for( int k = 0 ; k < blockPts->size() - 1 ; ++k ) + for( size_t k = 0 ; k < blockPts->size() - 1 ; ++k ) { blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); } @@ -1490,7 +1988,7 @@ namespace REX parsed = true; } lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ - xmlFile = originFile; start = begin; size_t trueStart = originFile.find_first_not_of("\n ", begin+1); + xmlFile = originFile; start = begin; modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" getParameters(){ modded = true; @@ -1534,6 +2032,15 @@ namespace REX slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } + slhaNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); + } + slhaNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } + slhaNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) : xmlNode( originFile, begin ){ if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } @@ -1559,7 +2066,7 @@ namespace REX }; // ZW: struct for handling LHE init nodes - struct initNode : xmlNode { + struct initNode : public xmlNode { public: std::shared_ptr getHead(){ return initHead; } std::vector> getLines(){ return initLines; } @@ -1569,8 +2076,31 @@ namespace REX initNode() : xmlNode(){ name = "init"; } initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) : xmlNode( originFile, begin ){ - auto strtPt = originFile.find_first_not_of(" \n", originFile.find(">", start+1)); - content = originFile.substr( strtPt, originFile.find(" node, bool parseOnline = false ) : xmlNode( *node ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } } protected: std::shared_ptr initHead; @@ -1580,7 +2110,7 @@ namespace REX auto linebreaks = lineFinder( content ); if( linebreaks->size() == 0 ){ return false; } initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); - for( int k = 0 ; k < linebreaks->size() - 1 ; ++k ){ + for( size_t k = 0 ; k < linebreaks->size() - 1 ; ++k ){ initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); } return true; @@ -1595,7 +2125,7 @@ namespace REX }; // ZW: struct for explicitly handling LHE header nodes - struct lheHead : xmlNode { + struct lheHead : public xmlNode { public: size_t addWgtGroup( std::shared_ptr& wgtGroup ){ hasRwgt = true; @@ -1614,30 +2144,30 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - void addWgt( unsigned int index, std::shared_ptr nuWgt ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, std::shared_ptr nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, headWeight nuWgt ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, headWeight nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, std::shared_ptr nuWgt, std::string idTagg ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; nuWgt->setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, headWeight nuWgt, std::string idTagg ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; @@ -1656,6 +2186,46 @@ namespace REX : xmlNode(originFile, begin, childs){ xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if (child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlNode& node ) : xmlNode(node){ + for( auto child : node.getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlNode* node ) : xmlNode(*node){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( std::shared_ptr node ) : xmlNode( *node ){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlTree tree ) : xmlNode( tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlTree* tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } } protected: bool wgtGrpIsInit = false; @@ -1677,7 +2247,7 @@ namespace REX void setRelChild(){ if( relChildSet ){ return; } relChild.reserve( children.size() ); - for( int k = 0 ; k < children.size() ; ++k ){ + for( size_t k = 0 ; k < children.size() ; ++k ){ auto child = &children[k]; if( (*child)->getName() == "slha" ){ continue; } if( (*child)->getName() == "initrwgt" ){ continue; } @@ -1787,17 +2357,22 @@ namespace REX }; // ZW: general struct for handling LHE files explicitly - struct lheNode : xmlNode { + struct lheNode : public xmlNode { public: - std::vector> events = {}; - std::shared_ptr header = std::make_shared(xmlFile, start); - std::shared_ptr init = std::make_shared(xmlFile, start); lheNode() : xmlNode(){} lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode(originFile, begin, childs){ - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); - if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } + if( child->getName() == "init" ){ init = std::make_shared( *child ); continue; } + if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } + } } + auto getHeader(){ return header; } + auto getInit(){ return init; } + auto& getEvents(){ return events; } bool isModded() override{ return modded; } bool isModded( bool deep ) override{ if( !deep ){ return isModded(); } @@ -1806,21 +2381,47 @@ namespace REX for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } return modStat; } + void setInit( std::shared_ptr initNod ){ init = initNod; } + void setHeader( std::shared_ptr headNod ){ header = headNod; } void addWgt( size_t index, newWgt& addedWgt ){ header->addWgt( index, addedWgt.getHeadWgt() ); auto wgtsVec = addedWgt.getBodyWgts(); - for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ header->addWgt( index, addedWgt.getHeadWgt(), idTag ); auto wgtsVec = addedWgt.getBodyWgts(); - for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } + void setRelStats( std::vector& particles ){ + relStat = particles; + } + std::vector& getRelStats(){ + return relStat; + } + void setSameSort( sortFcn& sortF ){ + particleSort = sortF; + } + sortFcn& getSameSort(){ + return particleSort; + } + void setStatSort( statSort& statS ){ + statParticleSort = statS; + } + statSort& getStatSort(){ + return statParticleSort; + } protected: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + std::vector relStat = {"-1", "1"}; + sortFcn particleSort = []( std::vector prts ){ return stodSort(prts); }; + statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stodSort(prts); }; virtual void headerWriter(){ nodeContent += "\n" + *header->nodeWriter(); } @@ -1860,17 +2461,18 @@ namespace REX // ZW: function for extracting event information from // LHE files - std::vector>> valExtraction( const lheNode& lheFile ) + std::vector>> valExtraction( lheNode& lheFile ) { bool getGs = true; auto momVec = std::make_shared>(); auto wgtVec = std::make_shared>(); auto gVec = std::make_shared>(); - momVec->reserve( lheFile.events.size() * 4 * std::stoi(std::string(lheFile.events[0]->getHead().getNprt())) ); - wgtVec->reserve( lheFile.events.size() ); - gVec->reserve( lheFile.events.size() ); + auto events = lheFile.getEvents(); + momVec->reserve( events.size() * 4 * std::stoi(std::string(events[0]->getHead().getNprt())) ); + wgtVec->reserve( events.size() ); + gVec->reserve( events.size() ); if( getGs ){ - for( auto event : lheFile.events ) + for( auto event : events ) { wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); @@ -1882,7 +2484,7 @@ namespace REX } } } else{ - for( auto event : lheFile.events ) + for( auto event : events ) { wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); @@ -1902,7 +2504,7 @@ namespace REX std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); @@ -1912,8 +2514,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -1922,14 +2524,12 @@ namespace REX std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { - auto nuStrtPos = *nodeStartFind( parseFile, initPos); currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ - auto nuLine = parseFile.find("\n", parseFile.find("<", initPos)); currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); } if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ @@ -1941,8 +2541,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -1951,32 +2551,19 @@ namespace REX std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { - auto nuStrtPos = *nodeStartFind( parseFile, initPos); - //if( nuStrtPos == parseFile.find("events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); - // continue; - //} else if( nuStrtPos == parseFile.find("header = lheHeadParser( parseFile, initPos, endPos ); - // continue; - //} else if( nuStrtPos == parseFile.find("init = std::make_shared( parseFile, initPos ); - // initPos = *nodeStartFind( parseFile, endPos ); - // endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); - // continue; - //} if( parseFile.substr( initPos, 6 ) == "events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + currNode->getEvents().push_back( evPtrParsor( parseFile, initPos, endPos ) ); continue; } else if( parseFile.substr( initPos, 7 ) == "header = lheHeadParser( parseFile, initPos, endPos ); + currNode->setHeader(lheHeadParser( parseFile, initPos, endPos )); continue; } else if( parseFile.substr( initPos, 5 ) == "init = std::make_shared( parseFile, initPos ); - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + currNode->setInit( std::make_shared( parseFile, initPos ) ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, nodeEndFind( parseFile, endPos + 1 ) + 1); continue; } else { currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); @@ -1987,8 +2574,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -2003,18 +2590,48 @@ namespace REX std::vector plusTwo; std::vector plusThree; std::vector minusNine; + std::vector orderMOne; + std::vector orderOne; + std::vector orderMTwo; + std::vector orderTwo; + std::vector orderThree; + std::vector orderNine; std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; + std::map> orderVecs{{"-1", orderMOne}, {"1", orderOne}, {"-2", orderMTwo}, {"2", orderTwo}, {"3", orderThree}, {"9",orderNine}}; lheProc( event& eventNode ) { for( auto prt : eventNode.getPrts() ) { valVecs[prt->getStatus()].push_back(prt->getPDG()); } + for( auto valVec = valVecs.begin() ; valVec!= valVecs.end() ; ++valVec ){ + if( valVec->second.size() == 0 ){ continue; } + orderVecs[valVec->first] = *stoiSort( valVec->second ); + } + } + std::shared_ptr writer(){ + auto written = std::make_shared(); + for( auto inits : valVecs["-1"] ){ + written->append(inits); + written->append(" "); + } + if( valVecs["2"].size() > 0 ){ + written->append("> "); + for( auto inits : valVecs["2"] ){ + written->append(inits); + written->append(" "); + } + } + written->append("> "); + for( auto inits : valVecs["1"] ){ + written->append(inits); + written->append(" "); + } + return written; } }; - // ZW: fcn for uploading text files - // to the program, pushing all characters to lowercase + // ZW: fcn for uploading text files to the program std::shared_ptr filePuller( const std::string& fileLoc ) { std::ifstream fileLoad( fileLoc ); @@ -2037,9 +2654,9 @@ namespace REX return true; } - // ZW: fcn for extracting the fill + // ZW: fcn for extracting the full // process information from an LHE event - std::shared_ptr>> pgdXtract( event& currEv, const std::vector& pdgVec ) + std::shared_ptr>> pdgXtract( event& currEv ) { auto currProc = std::make_shared>>(); auto &useProc = *currProc; @@ -2049,52 +2666,193 @@ namespace REX } return currProc; } + + template + bool chaoticVecComp( const std::vector& vec1, const std::vector order1, const std::vector& vec2, const std::vector order2 ) + { + if( vec1.size()!= vec2.size() ){ return false; } + for( size_t i = 0; i < vec1.size(); i++ ){ + if( vec1[order1[i]]!= vec2[order2[i]] ){ return false; } + } + return true; + } - // ZW: fcn for comparing two processes it the - // format output by pgdXtract + // ZW: fcn for comparing two processes in the + // format output by pdgXtract bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& pdgVec ) + std::vector>& secVec, const std::vector& statVec ) { if( firstVec.size() != secVec.size() ){return false;} - for(auto code : pdgVec ) + for(auto code : statVec ) { if( firstVec[code] != secVec[code] ){ return false; } } return true; } + bool sameProcString( std::map>& firstVec, std::map>& firstOrder, + std::map>& secVec, std::map>& secondOrder, + std::vector& statVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : statVec ) + { + if( !chaoticVecComp(firstVec[code], firstOrder[code], secVec[code], secondOrder[code]) ){ return false; } + } + return true; + } + // ZW: fcn for processes in the lheProc struct format - bool procComp( const lheProc& firstProc, const lheProc& secProc, const std::vector& pdgVec ) + bool procComp( lheProc& firstProc, lheProc& secProc, std::vector statVec ) { - for( auto stat : pdgVec ) + for( auto stat : statVec ) { if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } - if( firstProc.valVecs.at(stat) != secProc.valVecs.at(stat) ){ return false; } + if( !chaoticVecComp( firstProc.valVecs[stat], firstProc.orderVecs[stat], secProc.valVecs[stat], secProc.orderVecs[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc()[stat].size()!= secEv.getProc()[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc()[stat], firstEv.getProcOrder()[stat], + secEv.getProc()[stat], secEv.getProcOrder()[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } } return true; } - // ZW: fcn for checking whether a list of pdgKtract format + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + struct eventComp{ + bool operator()( event& firstEv, event& secEv){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool operator()( const event& firstEv, const event& secEv) const { + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool operator()(event& firstEv, event& secEv, std::vector statVec){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} + } + }; + + // ZW: fcn for checking whether a list of pdgXtract format // processes sourceProcList contains a given process newProc bool procVecContains( std::vector>>>& sourceProcList, - std::map>& newProc, const std::vector& pdgVec ) - { - int noProcs = sourceProcList.size(); + std::map>& newProc, const std::vector& statVec ) + {\ for( auto proc : sourceProcList ) { - if( sameProcString( *proc, newProc, pdgVec ) ){ return true; } + if( sameProcString( *proc, newProc, statVec ) ){ return true; } } return false; } // ZW: fcn for checking whether a vector of lheProc structs // procList contains a given lheProc nuProc - bool procListComp( const std::vector>& procList, const lheProc& nuProc, const std::vector& pdgVec ) + bool procListComp( const std::vector>& procList, lheProc& nuProc, std::vector statVec ) { if( procList.size() != 0 ){ for(auto proc : procList ) { - if( procComp( *proc, nuProc, pdgVec ) ){ return true; } + if( procComp( *proc, nuProc, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + sortFcn sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + statSort sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } } } return false; @@ -2102,42 +2860,85 @@ namespace REX // ZW: fcn for extracting the different processes // in a given REX format LHE file in the pdgXtract format - std::vector>>> procExtractor( const lheNode& lheFile ) + std::vector>>> procExtractor( lheNode& lheFile ) { std::vector>>> procList; - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; - for( auto event : lheFile.events ) + const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + for( auto event : lheFile.getEvents() ) { - auto currProc = pgdXtract( *event, pdgVec ); - if( procVecContains( procList, *currProc, pdgVec ) ){ continue; } + auto currProc = pdgXtract( *event ); + if( procVecContains( procList, *currProc, statVec ) ){ continue; } procList.push_back(currProc); } return procList; } - // ZW: fcn for extracting the differenty processes + // ZW: fcn for extracting the different processes // in a given REX format LHE file in the lheProc format - std::vector> processPull( const lheNode& lheFile ) + std::vector> processPull( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) { - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; std::vector> procsList{}; - for( auto event : lheFile.events ) + for( auto event : lheFile.getEvents() ) { auto currProc = std::make_shared( *event ); - if( procListComp( procsList, *currProc, pdgVec ) ){ continue; } + if( procListComp( procsList, *currProc, statVec ) ){ continue; } procsList.push_back( currProc ); } return procsList; } + std::vector> evProcessPull( lheNode& lheFile, std::vector statVec = { "-1", "1" } ) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setSameSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setStatSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + // ZW: fcn for keeping track of subprocess ordering // in LHE file - int procPos( const std::vector>& evtSet, lheProc& currProc, - const std::vector& pdgVec ) + size_t procPos( const std::vector>& evtSet, lheProc& currProc, + std::vector& statVec ) { - for( auto k = 0 ; k < evtSet.size() ; ++k ) + for( size_t k = 0 ; k < evtSet.size() ; ++k ) { - for( auto stat : pdgVec ) + for( auto stat : statVec ) { if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } } @@ -2146,48 +2947,210 @@ namespace REX return evtSet.size(); } + size_t evProcPos( const std::vector>& evtSet, event& currEv, + std::vector statVec = { "-1", "1" } ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + sortFcn sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + statSort sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + // ZW: fcn for extracting the subprocess ordering // of LHE file - std::vector>> procOrder( const lheNode& lheFile, const std::vector>& evtSet ) + std::vector>> procOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) { - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector>> eventBools( evtSet.size()); - std::vector> pracBools( evtSet.size(), std::vector ( lheFile.events.size() )); - for( auto boolSets : pracBools ){ - std::fill( boolSets.begin(), boolSets.end(), false ); + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector>> eventBools( evtSet.size(), std::make_shared> ( lheFile.getEvents().size() )); + //std::vector> pracBools( evtSet.size(), std::vector ( lheFile.getEvents().size() )); + for( auto boolSets : eventBools ){ + std::fill( boolSets->begin(), boolSets->end(), false ); } - for( auto k = 0 ; k < lheFile.events.size() ; ++k ) + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) { - auto currProc = lheProc(*lheFile.events[k]); - pracBools[ procPos(evtSet, currProc, pdgVec) ][ k ] = true; + auto currProc = lheProc(*lheFile.getEvents()[k]); + eventBools[ procPos(evtSet, currProc, statVec) ]->at( k ) = true; } - for( int k = 0 ; k < eventBools.size() ; ++k ) + //for( size_t k = 0 ; k < eventBools.size() ; ++k ) + //{ + // eventBools[k] = std::make_shared>( pracBools[k] ); + //} + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) { - eventBools[k] = std::make_shared>( pracBools[k] ); + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], statVec) ]->at( k ) = true; } - return eventBools; + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; } // ZW: fcn for reordering LHE file based on subprocess - std::shared_ptr>> eventReOrder( const lheNode& lheFile, std::vector relProc ) + std::shared_ptr>> eventReOrder( lheNode& lheFile, std::vector relProc ) { auto reOrdered = std::make_shared>>(); reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); - for( int k = 0 ; k < relProc.size() ; ++k ) + for( size_t k = 0 ; k < relProc.size() ; ++k ) { if(!relProc[k]){continue;} - reOrdered->push_back( lheFile.events[k] ); + reOrdered->push_back( lheFile.getEvents()[k] ); } return reOrdered; } // ZW: wrapper for eventReOrder - std::vector>>> lheReOrder( const lheNode& lheFile ) + std::vector>>> lheReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = processPull( lheFile, statVec ); + auto relProcs = procOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + sortFcn sorter, std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + statSort sorter, std::vector statVec = { "-1", "1" } ) { - auto procSets = processPull( lheFile ); - auto relProcs = procOrder( lheFile, procSets ); + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); std::vector>>> ordProcs(procSets.size()); - for( int k = 0 ; k < relProcs.size() ; ++k ) + for( size_t k = 0 ; k < relProcs.size() ; ++k ) { ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); } @@ -2202,10 +3165,11 @@ namespace REX std::vector aQEDs; std::vector aQCDs; std::vector nprts; + std::vector relNPrts; std::vector procIDs; evtInfo( const std::vector>& lheFile = {} ){ int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); procIDs.reserve(nEvt); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) { wgts.push_back(evt->getHead().getWeight()); @@ -2216,6 +3180,53 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } + evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc()[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } }; // ZW: transposed particle information struct @@ -2243,7 +3254,7 @@ namespace REX spins.push_back( prt->getSpin() ); statuses.push_back( prt->getStatus() ); pdgs.push_back( prt->getPDG() ); - for( int k = 0 ; k < 2 ; ++k ) + for( size_t k = 0 ; k < 2 ; ++k ) { moms.push_back( prt->getMom()[k] ); mothers.push_back( prt->getMothers()[k] ); @@ -2253,16 +3264,123 @@ namespace REX } } } - }; - - // ZW: transposed LHE file with a single process type - struct transMonoLHE { - public: - evtInfo evtsHead; - prtInfo evtsData; - transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder()[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + }; + + // ZW: transposed LHE file with a single process type + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + std::shared_ptr process; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ evtsHead = evtInfo(lheFile); evtsData = prtInfo(lheFile, nPrt); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, + statSort sorter, + std::vector statVec = { "-1", "1" } ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; } }; @@ -2271,16 +3389,82 @@ namespace REX public: std::string_view xmlFile; std::vector> subProcs; + std::vector> procSets; + std::vector>> relProcs; + transLHE(){ return; } transLHE( lheNode& lheFile ) { + procSets = evProcessPull( lheFile ); + relProcs = evProcOrder( lheFile, procSets ); xmlFile = lheFile.getFile(); - auto procsOrdered = lheReOrder( lheFile ); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs ); subProcs = std::vector>( procsOrdered.size() ); - for( int k = 0 ; k < procsOrdered.size() ; ++k ) + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) { subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); } } + transLHE( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = { "-1", "1" } ) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = { "-1", "1" } ) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE( lheNode& lheFile, const std::vector& statVec ) + { + procSets = evProcessPull( lheFile, statVec ); + relProcs = evProcOrder( lheFile, procSets, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); + } + } + template + std::shared_ptr> vectorFlat( std::vector>> vecVec ) + { + if( vecVec.size() == relProcs.size() ) continue; + else throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + for( size_t k = 0 ; k < vecVec.size() ; ++k){ + if( vecVec[k]->size() == relProcs[k]->size() ) continue; + else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); + } + auto flatVec = std::make_shared>(relProcs[0]->size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ){ + currInd = 0; + for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ + if( relProcs[k]->at(j) ){ + flatVec->at(currInd) = vecVec[k]->at(currInd); + ++currInd; + } + } + } + return flatVec; + } }; // ZW: vector transformation string_to_double @@ -2373,32 +3557,247 @@ namespace REX auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); std::vector>> &lheDs = *lheDos; int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.init->getHead()->ebmup[0], lheFile.init->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + //auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + const std::vector& statVec, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } if( boolVec[1] ){ - std::vector xsecVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xsecVec.push_back(line->xsecup); } lheDs[currInd] = vecStoD( xsecVec ); ++currInd; } if( boolVec[2] ){ - std::vector xerrVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xerrVec.push_back(line->xerrup); } lheDs[currInd] = vecStoD( xerrVec ); ++currInd; } if( boolVec[3] ){ - std::vector xmaxVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xmaxVec.push_back(line->xmaxup); } lheDs[currInd] = vecStoD( xmaxVec ); ++currInd; } - for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) { if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } @@ -2432,20 +3831,20 @@ namespace REX auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); std::vector>> &lheDs = *lheIs; int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idbmup[0], lheFile.init->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfgup[0], lheFile.init->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfsup[0], lheFile.init->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->nprup } ); ++currInd; } + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } if( boolVec[5] ){ - std::vector lprVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { lprVec.push_back(line->lprup); } lheDs[currInd] = vecStoI( lprVec ); ++currInd; } - for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) { if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } @@ -2456,4 +3855,110 @@ namespace REX } return lheIs; } -} \ No newline at end of file + + std::shared_ptr>>> lheValInts( lheNode& lheFile, std::vector statVec, + lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + statSort sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } +} + +#endif \ No newline at end of file diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc new file mode 100644 index 0000000000..ee74f097f4 --- /dev/null +++ b/tools/REX/rwgt_driver.cc @@ -0,0 +1,115 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for C++ Standalone by +//%(info_lines)s +//========================================================================== +//========================================================================== +// Driver for reweighting events for processes +//%(multiprocess_lines)s +//-------------------------------------------------------------------------- + +#include "rwgt_instance.h" +#include +#include +//%(include_lines)s + +int usage( char* argv0, int ret = 1 ) +{ + std::cout << "Usage: " << argv0 + << " [--lhefile=\"/YOUR/PATH/HERE\"|-lhe=\"/YOUR/PATH/HERE\"] [--rwgtcard=/YOUR/PATH/HERE|-rwgt=\"/YOUR/PATH/HERE\"]\n" + << "[--output=/YOUR/PATH/HERE\"|-out=\"/YOUR/PATH/HERE\"]\n" << "[--param_card=/YOUR/PATH/HERE\"|-slha=\"/YOUR/PATH/HERE\"]\n"; + std::cout << "\n"; + std::cout << "The LHE file path should be with respect to the directory you are running\n"; + std::cout << "this program from, and similarly the rwgt_card should be as well.\n"; + return ret; +} + + +int main( int argc, char** argv ){ + std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; + + if (argc < 2){ + return usage( argv[0] ); + } + + // READ COMMAND LINE ARGUMENTS + for( int i = 1; i <= argc; i++ ) + { + auto currArg = std::string( argv[i] ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ + slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); + } + { + return usage( argv[0] ); + } + } + + if( lheFilePath.empty() || rwgtCardPath.empty() ){ + return usage( argv[0] ); + } + + std::string currPath = argv[0]; + + size_t slashPos = currPath.find_last_of( "/" ); + bool onWindows = false; + if( slashPos == std::string::npos ){ slashPos = currPath.find_last_of( "\\" ); onWindows = true; } + if( slashPos == std::string::npos ) + throw std::runtime_error( "Failed to determine current working directory -- need to know where program is run from to identify where to pull and push param_card.dat." ); + + if( slhaPath.empty() ){ + if( onWindows ){ + if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "..\\..\\Cards\\param_card.dat"; + } else{ + slhaPath = "\\Cards\\param_card.dat"; + } + } else { + if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "../../Cards/param_card.dat"; + } else { + slhaPath = "/Cards/param_card.dat"; + } + }} + + + // ZW : include rwgt_instances(s) +//%(rwgt_runners)s + +// std::vector runSet = {%(run_set)s}; + std::vector runSet; + REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + + REX::teaw::ampCall subProcSet; + + for( auto proc : runSet ){ + subProcSet.insert( REX::teaw::ampPair( proc.procEvent, proc.bridgeCall ) ); + } + + //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); + + //std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; + REX::teaw::rwgtRunner driver( fileCol, subProcSet ); + + + driver.runRwgt( outputPath ); + + return 0; + +} \ No newline at end of file diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h new file mode 100644 index 0000000000..374810a1aa --- /dev/null +++ b/tools/REX/rwgt_instance.h @@ -0,0 +1,69 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// Library including generic functions and classes for event reweighting. +// Process-specific rwgt_runner files are generated by mg5amc@nlo and use +// this library, while the rwgt_driver file is a wrapping program that +// calls the process-specific runners for given subprocesses. +//========================================================================== + +#ifndef _RWGT_INSTANCE_H_ +#define _RWGT_INSTANCE_H_ + +#include "teawREX.hpp" + +namespace rwgt{ + + //ZW: Function for calculating the number of remaining events in a warp + // in order to pad the input arrays to a multiple of the warp size + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ + return (nWarp - ( nEvt % nWarp )) % nWarp; + } + + //ZW: Function for padding the input arrays to a multiple of the warp size + template + std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ + auto nEvt = input->size(); + auto nWarpRemain = warpRemain( nEvt, nWarp ); + auto fauxNEvt = nEvt + nWarpRemain; + auto output = std::make_shared>( fauxNEvt ); + std::copy( input.begin(), input.end(), output->begin()); + return output; + } + + struct instance{ + std::vector> procEvent; + REX::event process; + REX::teaw::amplitude bridgeCall; + instance(){} + instance( std::vector>& event){ + this->procEvent = event; + this->process = REX::event( event ); + } + instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEvent = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void setProc( std::vector>& event ){ + this->procEvent = event; + this->process = REX::event( event ); + } + void setAmp( REX::teaw::amplitude& amp ){ + bridgeCall = amp; + } + std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ){ + return bridgeCall( momenta, alphaS ); + } + std::shared_ptr> ampEval( std::shared_ptr> momenta, + std::shared_ptr> alphaS ){ + return bridgeCall( *momenta, *alphaS ); + } + }; + +} + +#endif \ No newline at end of file diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc new file mode 100644 index 0000000000..8af00324a6 --- /dev/null +++ b/tools/REX/rwgt_runner.cc @@ -0,0 +1,134 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for the CUDACPP plugin by +%(info_lines)s +//========================================================================== +//========================================================================== +// A class for reweighting matrix elements for +%(process_lines)s +//-------------------------------------------------------------------------- + +#include "teawREX.hpp" +#include "rwgt_instance.h" +#include "fbridge.cc" + +// ZW: SET UP NAMESPACE +namespace %(process_namespace)s{ +//namespace dummy{ + + struct fbridgeRunner{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + CppObjectInFortran *fBridge; + const unsigned int chanId = 0; + const int nMom = 4; + int nWarpRemain; + int nEvt; + int fauxNEvt; + int nPar; + bool setup = false; + fbridgeRunner(){} + fbridgeRunner( REX::event& process ){ + nPar = process.getPrts().size(); + } + void runnerSetup( unsigned int& noEvts, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = noEvts; + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::vector& evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = evVec.size(); + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::shared_ptr> evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + runnerSetup( *evVec, warpSize ); + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + runnerSetup( alphaS ); + for( size_t k = 0 ; k < nWarpRemain ; ++k ){ + alphaS.push_back( 0. ); + for( size_t k = 0 ; k < nMom * nPar ; ++k ){ + momenta.push_back( 0. ); + } + } + auto evalScatAmps = std::make_shared>( fauxNEvt ); + fbridgecreate_( &fBridge, &fauxNEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + alphaS.resize( nEvt ); + momenta.resize( nEvt * nPar * nMom ); + evalScatAmps->resize( nEvt ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + return scatAmp( *momenta, *alphaS ); + } +#if defined MGONGPU_FPTYPE_FLOAT + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + auto nuMom = std::vector( nEvt ); + auto nuAlphaS = std::vector( nEvt ); + std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } +#endif + }; + + std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ + std::vector initPrts = %(init_prt_ids)s + std::vector finPrts = %(fin_prt_ids)s +// std::vector initPrts = {"-1"}; +// std::vector finPrts = {"1"}; + if( status == "-1" ){ + return REX::getRefOrder( initPrts, arguments ); + } + else if( status == "1" ){ + return REX::getRefOrder( finPrts, arguments ); + } + return REX::stoiSort( arguments ); + } + +// ZW: SET UP INPUT LHE BLOCK +// ZW: SET UP REX::event FROM LHE BLOCK +// auto procEvent = REX::event( procEvent ); + REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + std::vector> eventVec = {%(process_event)s}; + REX::event locEv = REX::event( eventVec ); + fbridgeRunner fBridge = fbridgeRunner( locEv ); + + REX::teaw::amplitude scatteringAmp = []( std::vector& momenta, std::vector& alphaS ){ + return fBridge.scatAmp( momenta, alphaS ); + }; + + REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + auto runner = rwgt::instance(eventVec, scatteringAmp); + //auto thisProc = runner.getProc( scatteringAmp ); + auto thisProc = runner.process.getProc( currProcSort ); +// ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE + +// ZW: SET UP EVALUATION OF MATRIX ELEMENTS FUNCTION + + +} \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 5c2eb2d3cd..e6b2c5f1e3 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -12,19 +12,33 @@ // IF YOU SEE THIS FILE, IT HAS BEEN SPREAD // FROM AN IMPROPER RELEASE. -// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. +#ifndef _TEAWREX_HPP_ +#define _TEAWREX_HPP_ + #include #include #include #include #include #include +#include #include "REX.hpp" +#ifndef FORTRANFPTYPE +#define FORTRANFPTYPE double +#endif + namespace REX::teaw { + + using amplitude = std::function>(std::vector&, std::vector&)>; + using ampCall = std::map; + using ampPair = std::pair; + using vecMap = std::map>, REX::eventComp>; + template std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) { return evalFunc(momenta); } @@ -89,7 +103,7 @@ namespace REX::teaw { name = title; rwgtVals.resize( values.size() ); - for( int k = 0 ; k < values.size() ; ++k ) + for( size_t k = 0 ; k < values.size() ; ++k ) { rwgtVals[k] = rwgtVal( values[k] ); } @@ -147,7 +161,7 @@ namespace REX::teaw } } rwgtParams.reserve(blocks.size()); - for( int k = 0 ; k < blocks.size() ; ++k ) + for( size_t k = 0 ; k < blocks.size() ; ++k ) { rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); } @@ -165,7 +179,7 @@ namespace REX::teaw std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ auto slhaOrig = std::make_shared( paramOrig ); std::map blockIds; - for( int k = 0 ; k < slhaOrig->blocks.size() ; ++k ) + for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) { slhaOrig->blocks[k].parse( true ); auto nyama = std::pair( slhaOrig->blocks[k].name, k); blockIds.insert( nyama ); } @@ -204,7 +218,7 @@ namespace REX::teaw if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } nuLnch = srcCard.find( "launch", nuLnch + 6 ); } - for( int k = 0 ; k < lnchPos.size() - 1 ; ++k ) + for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) { auto strtLi = srcCard.find( "set", lnchPos[k] ); rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); @@ -289,16 +303,15 @@ namespace REX::teaw lheFile = lhe; lheFileSet = true; } - void setLhe( REX::lheNode lhe ){ + void setLhe( REX::lheNode& lhe ){ if( lheFileSet ){ return; } setLhe( std::make_shared( lhe ) ); lheFileSet = true; } void setLhe( std::string_view lhe_file ){ if( lheFileSet ){ return; } - size_t strt = 0; - size_t post = *REX::nodeEndFind( lhe_file, strt ); - lheFile = REX::lheParser( lhe_file, strt, post ); + //lheFile = REX::lheParser( lhe_file, strt, post ); + lheFile = std::make_shared( *lheFile ); lheFileSet = true; } std::shared_ptr getRwgt(){ return rwgtSets; } @@ -311,24 +324,33 @@ namespace REX::teaw setRwgt( rwgts ); } protected: - void setDoubles(){ + template + void setDoubles(Args&&... args){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; - auto vecOfVecs = REX::lheValDoubles( *lheFile, returnBools ); - if( vecOfVecs->size() != 3 ) - throw std::runtime_error( "LHE file appears to contain multiple types of processes. This has not yet been implemented." ); - wgts = vecOfVecs->at( 0 ); gS = vecOfVecs->at( 1 ); momenta = vecOfVecs->at( 2 ); + eventFile = REX::transLHE( *lheFile, args... ); + auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); + if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) + throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); + //wgts[0] = vecOfVecs->at( 0 ); gS[0] = vecOfVecs->at( 1 ); momenta[0] = vecOfVecs->at( 2 ); + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + wgts.push_back( vecOfVecs->at( 3*k ) ); + gS.push_back( vecOfVecs->at( 3*k + 1 ) ); + momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); + } } std::shared_ptr rwgtSets; std::shared_ptr slhaParameters; std::shared_ptr lheFile; - std::shared_ptr> wgts; - std::shared_ptr> gS; - std::shared_ptr> momenta; + std::vector>> wgts; + std::vector>> gS; + std::vector>> momenta; bool lheFileSet = false; bool slhaSet = false; bool rwgtSet = false; + REX::transLHE eventFile; }; struct rwgtFiles : rwgtCollection { @@ -341,20 +363,22 @@ namespace REX::teaw setSlhaPath( slha_card ); setLhePath( lhe_card ); } - void initCards(){ + template + void initCards(Args&&... args){ if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); pullRwgt(); pullSlha(); pullLhe(); setLhe( *lheCard ); setSlha( std::make_shared( *slhaCard ) ); setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); - setDoubles(); + setDoubles(args...); } - void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ){ + template + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ setLhePath( lhe_card ); setSlhaPath( slha_card ); setRwgtPath( reweight_card ); - initCards(); + initCards(args...); } protected: void pullRwgt(){ @@ -376,32 +400,61 @@ namespace REX::teaw struct rwgtRunner : rwgtFiles{ public: - void setMeEval( std::function>(std::vector&, std::vector&)> eval ){ meEval = eval; meInit = true; } + void setMeEval( amplitude eval ){ + meEval = eval; meInit = true; + ampCall nuEvals; + nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); + meEvals = nuEvals; + } + void setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } + void addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } rwgtRunner() : rwgtFiles(){ return; } rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } - rwgtRunner( rwgtFiles& rwgts, std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( rwgts ){ + rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ meEval = meCalc; meInit = true; } + rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ + meEvals = meCalcs; + meCompInit = true; + } rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEval = meCalc; meInit = true; } + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + meEvals = meCalcs; + meCompInit = true; + } + bool oneME(){ return (meInit != meCompInit); } + bool singAmp(){ return (meInit && !meCompInit); } protected: bool meInit = false; + bool meCompInit = false; bool meSet = false; bool normWgtSet = false; - std::function>(std::vector&, std::vector&)> meEval; - std::shared_ptr> initMEs; - std::shared_ptr> meNormWgts; + amplitude meEval; + ampCall meEvals; + std::vector>> initMEs; + std::vector>> meNormWgts; + std::shared_ptr> normWgt; std::shared_ptr rwgtGroup; - void setMEs(){ - initCards(); - if( !meInit ) - throw std::runtime_error( "No function for evaluating scattering amplitudes has been provided." ); - auto ins = meEval( *momenta, *gS ); - initMEs = std::make_shared>( ins->begin(), ins->begin() + wgts->size() ); + template + void setMEs(Args&&... args){ + initCards(args...); + if( !oneME() ) + throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); + //ZW FIX THIS + initMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + auto ins = meEvals[eventFile.subProcs[k]]( *(momenta[k]), *(gS[k]) ); + initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); + } + //auto ins = meEval( *(momenta[0]), *(gS[0]) ); + //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; meSet = true; } bool setParamCard( std::shared_ptr slhaParams ){ @@ -413,14 +466,36 @@ namespace REX::teaw throw std::runtime_error( "Failed to overwrite parameter card." ); return true; } - void setNormWgts(){ - if( !meSet ){ setMEs(); } - if( initMEs->size() != wgts->size() ) - throw std::runtime_error( "Inconsistent number of events and event weights." ); - meNormWgts = std::make_shared>( wgts->size() ); - for( size_t k = 0; k < initMEs->size(); k++ ){ - meNormWgts->at( k ) = wgts->at( k ) / initMEs->at( k ); + void setNormWgtsSingleME(){ + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + meNormWgts = {std::make_shared>( wgts[0]->size() )}; + for( size_t k = 0; k < initMEs[0]->size(); k++ ){ + meNormWgts[0]->at( k ) = wgts[0]->at( k ) / initMEs[0]->at( k ); + } + normWgt = meNormWgts[0]; + } + void setNormWgtsMultiME(){ + meNormWgts = std::vector>>( initMEs.size() ); + for( auto k = 0 ; k < wgts.size() ; ++k ){ + meNormWgts[k] = std::make_shared>( wgts[k]->size() ); + for( auto i = 0 ; i < wgts[k]->size() ; ++i ){ + meNormWgts[k]->at( i ) = wgts[k]->at( i ) / initMEs[k]->at( i ); + } + } + normWgt = eventFile.vectorFlat( meNormWgts ); + } + template + void setNormWgts(Args&&... args){ + if( !oneME() ){ setMEs(args); } + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + for( auto k = 0; k < initMEs.size() ; ++k ){ + if( initMEs[k]->size() != wgts[k]->size() ) + throw std::runtime_error( "Inconsistent number of events and event weights." ); } + if( initMEs.size() == 1 ){ setNormWgtsSingleME(); } + else { setNormWgtsMultiME(); } normWgtSet = true; } bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ @@ -428,8 +503,21 @@ namespace REX::teaw throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) throw std::runtime_error( "Failed to rewrite parameter card." ); - auto newMEs = meEval( *momenta, *gS ); - auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); lheFile->addWgt( 0, nuWgt ); return true; @@ -439,8 +527,71 @@ namespace REX::teaw throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) throw std::runtime_error( "Failed to rewrite parameter card." ); - auto newMEs = meEval( *momenta, *gS ); - auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + //auto newMEs = meEval( *momenta, *gS ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + std::string& id, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); lheFile->addWgt( 0, nuWgt ); return true; @@ -456,9 +607,9 @@ namespace REX::teaw setMEs(); setNormWgts(); rwgtGroup = std::make_shared(); - auto currInd = lheFile->header->addWgtGroup( rwgtGroup ); + auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); auto paramSets = rwgtSets->writeCards( *slhaParameters ); - for( int k = 0 ; k < paramSets.size(); k++ ){ + for( size_t k = 0 ; k < paramSets.size(); k++ ){ singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); std::cout << "."; } @@ -467,4 +618,6 @@ namespace REX::teaw std::cout << "\nReweighting done.\n"; } }; -} \ No newline at end of file +} + +#endif \ No newline at end of file From 77ee370de1839d51a628c69432a219ebbcd014a1 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 11:02:16 +0100 Subject: [PATCH 03/76] small fixes to rwgt code --- tools/REX/rwgt_driver.cc | 6 +++--- tools/REX/rwgt_runner.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index ee74f097f4..57838a2dd0 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -5,11 +5,11 @@ //========================================================================== //========================================================================== // This file has been automatically generated for C++ Standalone by -//%(info_lines)s +%(info_lines)s //========================================================================== //========================================================================== // Driver for reweighting events for processes -//%(multiprocess_lines)s +%(multiprocess_lines)s //-------------------------------------------------------------------------- #include "rwgt_instance.h" @@ -91,7 +91,7 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) //%(rwgt_runners)s -// std::vector runSet = {%(run_set)s}; + std::vector runSet = {%(run_set)s}; std::vector runSet; REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); fileCol.initCards(); diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 8af00324a6..a770bf69aa 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -14,10 +14,10 @@ #include "teawREX.hpp" #include "rwgt_instance.h" -#include "fbridge.cc" // ZW: SET UP NAMESPACE namespace %(process_namespace)s{ +#include "fbridge.cc" //namespace dummy{ struct fbridgeRunner{ From d3815b852a4f4bb1ffcf9688c3e05afe677d25f9 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 10:57:28 +0100 Subject: [PATCH 04/76] changed submodule to my fork --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1f00c67701..997b366b8f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "MG5aMC/mg5amcnlo"] path = MG5aMC/mg5amcnlo - url = https://github.com/mg5amcnlo/mg5amcnlo - branch = gpucpp + url = https://github.com/zeniheisser/mg5amcnlo + branch = rexCPP From 333bb5c16d1f9fbbad3dfd7ab2c15578d8ffc0ba Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 12:38:38 +0100 Subject: [PATCH 05/76] fixes to get rwgt exporter working --- .../PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 1 + .../madgraph/iolibs/template_files/REX | 1 + .../iolibs/template_files/gpu/cudacpp_rex.mk | 1040 +++++++++++++++++ .../CUDACPP_SA_OUTPUT/model_handling.py | 85 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 127 ++ tools/REX/rwgt_driver.inc | 1 + tools/REX/rwgt_runner.inc | 1 + 7 files changed, 1253 insertions(+), 3 deletions(-) create mode 120000 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk create mode 120000 tools/REX/rwgt_driver.inc create mode 120000 tools/REX/rwgt_runner.inc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index 5bee0c9be0..683f1dfd41 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -35,6 +35,7 @@ new_output = { 'madevent_simd' : output.SIMD_ProcessExporter, 'madevent_gpu' : output.GPU_ProcessExporter, 'standalone_cudacpp' : output.PLUGIN_ProcessExporter, + 'standalone_rwgtcpp' : output.RWGT_ProcessExporter, # the following one are used for the second exporter class # (not really needed so far but interesting if need # specialization in the futur) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX new file mode 120000 index 0000000000..1a916a1ca1 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX @@ -0,0 +1 @@ +../../../../../../../../tools/REX/ \ No newline at end of file diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk new file mode 100644 index 0000000000..efe82df88d --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk @@ -0,0 +1,1040 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) +override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) +override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else +override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) +TESTDIR = +else ifneq ($(LOCALGTEST),) +TESTDIR=$(TESTDIRLOCAL) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) +TESTDIR = $(TESTDIRCOMMON) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else +TESTDIR = +endif +ifneq ($(GTEST_ROOT),) +GTESTLIBDIR = $(GTEST_ROOT)/lib64/ +GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a +GTESTINC = -I$(GTEST_ROOT)/include +else +GTESTLIBDIR = +GTESTLIBS = +GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. +# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. +# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. +# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) +# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below +ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") + override CUDA_HOME=disabled +endif + +# If CUDA_HOME is not set, try to set it from the path to nvcc +ifndef CUDA_HOME + CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") +endif + +# If HIP_HOME is not set, try to set it from the path to hipcc +ifndef HIP_HOME + HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + $(warning HIP_HOME was not set: using "$(HIP_HOME)") +endif + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), +# builds are performed for HIP only if CUDA is not found in the path. +# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. +# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +#--- Option 1: CUDA exists -> use CUDA + +# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists +ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) + + GPUCC = $(CUDA_HOME)/bin/nvcc + USE_NVTX ?=-DUSE_NVTX + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # Embed device code for 70, and PTX for 70+. + # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + CUINC = -I$(CUDA_HOME)/include/ + CUOPTFLAGS = -lineinfo + ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + CUBUILDRULEFLAGS = -Xcompiler -fPIC -c + CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu + CUDATESTFLAGS = -lcuda + + # Set the host C++ compiler for GPUCC via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifneq ($(origin REQUIRE_CUDA),undefined) + + # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) + $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) + +#--- Option 2: CUDA does not exist, HIP exists -> use HIP + +# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists +else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) + + GPUCC = $(HIP_HOME)/bin/hipcc + #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? + HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a + HIPINC = -I$(HIP_HOME)/include/ + # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP + # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) + # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) + GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + GPUFLAGS += -std=c++17 + ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) + CUBUILDRULEFLAGS = -fPIC -c + CCBUILDRULEFLAGS = -fPIC -c -x hip + +else ifneq ($(origin REQUIRE_HIP),undefined) + + # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) + $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) + +#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP + +else + + # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ + $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) + $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) + override GPUCC= + override USE_NVTX= + override CUINC= + override HIPINC= + +endif + +# Export GPUCC (so that it can also be used in cudacpp_src.mk?) +export GPUCC +export GPUFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= -Xcompiler -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) +override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +# Set the default AVX (vectorization) choice +ifeq ($(AVX),) + ifeq ($(UNAME_P),ppc64le) + ###override AVX = none + override AVX = sse4 + else ifeq ($(UNAME_P),arm) + ###override AVX = none + override AVX = sse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override AVX = none + $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override AVX = 512y + ###$(info Using AVX='$(AVX)' as no user input exists) + else + override AVX = avx2 + ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + $(warning Using AVX='$(AVX)' because host does not support avx512vl) + else + $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) + endif + endif +else + ###$(info Using AVX='$(AVX)' according to user input) +endif + +# Set the default FPTYPE (floating point type) choice +ifeq ($(FPTYPE),) + override FPTYPE = d +endif + +# Set the default HELINL (inline helicities?) choice +ifeq ($(HELINL),) + override HELINL = 0 +endif + +# Set the default HRDCOD (hardcode cIPD physics parameters?) choice +ifeq ($(HRDCOD),) + override HRDCOD = 0 +endif + +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +export AVX +export FPTYPE +export HELINL +export HRDCOD +export OMPFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASHIPRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),none) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASHIPRAND=$(HASHIPRAND)) + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIR = ../../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) +else + override BUILDDIR = . + override LIBDIR = ../../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override CULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override CULIBFLAGSRPATH2 = +else + # RPATH to cuda/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking cuda/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + +cxx_main=$(BUILDDIR)/check.exe +fcxx_main=$(BUILDDIR)/fcheck.exe + +ifneq ($(GPUCC),) +cu_main=$(BUILDDIR)/gcheck.exe +fcu_main=$(BUILDDIR)/fgcheck.exe +else +cu_main= +fcu_main= +endif + +testmain=$(BUILDDIR)/runTest.exe + +ifneq ($(GTESTLIBS),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(testmain) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUOPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Generic target and build rules: objects from CUDA or HIP compilation +# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) +ifneq ($(GPUCC),) +$(BUILDDIR)/%%.o : %%.cu *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ + +$(BUILDDIR)/%%_cu.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ +endif + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUINC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ + +# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +ifeq ($(findstring nvcc,$(GPUCC)),nvcc) + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math +else + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math +endif +endif + +# Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) +$(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) + +# Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) +endif +ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o + +ifneq ($(GPUCC),) +MG5AMC_CULIB = mg5amc_$(processid_short)_cuda +cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o +cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o +endif + +# Target (and build rules): C++ and CUDA shared libraries +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_objects_lib) + $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# Bypass std::filesystem completely to ease portability on LUMI #803 +#ifneq ($(findstring hipcc,$(GPUCC)),) +# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +#else +# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +#endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +# Target (and build rules): C++ and CUDA standalone executables +$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(cu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(cu_main): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o + $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +$(BUILDDIR)/%%.o : %%.f *.inc + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc + +ifeq ($(UNAME_S),Darwin) +$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +else + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +endif + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +endif +ifeq ($(UNAME_S),Darwin) +$(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) +endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +$(BUILDDIR)/testxxx.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions + +ifneq ($(GPUCC),) +$(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions +endif + +$(BUILDDIR)/testmisc.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/testmisc.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests + +ifneq ($(GPUCC),) +$(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/testmisc_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests +endif + +$(BUILDDIR)/runTest.o: $(GTESTLIBS) +$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/runTest.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o + +ifneq ($(GPUCC),) +$(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(testmain): $(BUILDDIR)/runTest_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o +endif + +$(testmain): $(GTESTLIBS) +$(testmain): INCFLAGS += $(GTESTINC) +$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main + +ifneq ($(OMPFLAGS),) +ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +else +$(testmain): LIBFLAGS += -lgomp +endif +endif + +# Bypass std::filesystem completely to ease portability on LUMI #803 +#ifneq ($(findstring hipcc,$(GPUCC)),) +#$(testmain): LIBFLAGS += -lstdc++fs +#endif + +ifeq ($(GPUCC),) # link only runTest.o +$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +else # link both runTest.o and runTest_cu.o +$(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) +endif +endif + +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + +# Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +$(GTESTLIBS): +ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +else + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) +# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds +# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) +avxnone: + @echo + $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) + +avxsse4: + @echo + $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) + +avxavx2: + @echo + $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) + +avx512y: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) + +avx512z: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else ifeq ($(UNAME_P),arm) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z +avxall: avxnone avxsse4 avxavx2 avx512y avx512z +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so +endif + $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: check (run the C++ test executable) +# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] +ifneq ($(GPUCC),) +check: runTest cmpFcheck cmpFGcheck +else +check: runTest cmpFcheck +endif + +# Target: runTest (run the C++ test executable runTest.exe) +runTest: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/runTest.exe + +# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 + +# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) +runGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 + +# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 + +# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) +runFGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 + +# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) +cmpFGcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index de4d28ad16..dad73e6a6e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1074,9 +1074,14 @@ def get_process_class_definitions(self, write=True): replace_dict['nincoming'] = nincoming replace_dict['noutcoming'] = nexternal - nincoming replace_dict['nbhel'] = self.matrix_elements[0].get_helicity_combinations() # number of helicity combinations - file = self.read_template_file(self.process_class_template) % replace_dict # HACK! ignore write=False case - file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) - return file +# file = self.read_template_file(self.process_class_template) % replace_dict # HACK! ignore write=False case +# file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) + if write: + file = self.read_template_file(self.process_class_template) % replace_dict + file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) + return file + else: + return replace_dict # AV - replace export_cpp.OneProcessExporterGPU method (fix CPPProcess.cc) def get_process_function_definitions(self, write=True): @@ -1990,3 +1995,77 @@ def generate_helas_call(self, argument): self.add_amplitude(argument.get_call_key(), call_function) #------------------------------------------------------------------------------------ +class PLUGIN_OneProcessExporterRwgt(PLUGIN_OneProcessExporter): + """A custom OneProcessExporter for the REX reweighting""" + + rwgt_template = 'gpu/rwgt_runner.inc' + + # ZW - rwgt functions + def get_rwgt_legs(self, process): + """Return string with particle ids and status in the REX std::pair format""" + return ",".join(["{%i,%i}" % (leg.get('state'), leg.get('id')) \ + for leg in process.get('legs')]).replace('0', '-1') + + def get_init_prts_vec(self, process): + """Return string with initial state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 0]) + return "{" + prts + "}" + + def get_fin_prts_vec(self, process): + """Return string with final state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 1]) + return "{" + prts + "}" + + def get_rwgt_procMap(self, process): + """Return string with particle states and order in the REX procMap format""" + currState = False + retString = "thisProc{{\"-1\",{" + for leg in process.get('legs'): + if currState == leg.get('state'): + retString += "\"%i\"," % leg.get('id') + else: + currState = leg.get('state') + retString += "}},{\"1\",{\"%i,\"" % leg.get('id') + retString = retString[:-1] + "}}}" + return retString + + def get_proc_dir(self): + """Return process directory name for the current process""" + return "P%d_%s" % (self.process_number, self.process_name) + + def get_rwgt_runner(self): + """Return string to initialise the rwgtRunners in teawREX""" + return "%s::runner" % (self.get_proc_dir()) + + def get_rwgt_includes(self): + """Return string with the include directives for the REX reweighting""" + return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) + + def edit_rwgt_runner(self): + """Create the rwgt_runner.cc file for the REX reweighting""" + ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') + # Create the rwgt_runner.cc file +# replace_dict = {} + replace_dict = super().get_process_class_definitions(write=False) + rwgt_runner = self.get_proc_dir() + self.rwgt_template + replace_dict['process_namespace'] = self.get_proc_dir() + replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['init_prt_ids'] = self.get_init_prts_vec(self.matrix_elements[0].get('processes')[0]) + replace_dict['fin_prt_ids'] = self.get_fin_prts_vec(self.matrix_elements[0].get('processes')[0]) + replace_dict['process_event'] = self.get_rwgt_legs(self.matrix_elements[0].get('processes')[0]) + template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() + ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') + ff.write(template % replace_dict) + ff.close() + + # ZW - override the PLUGIN method to generate the rwgt_runner.cc file as well + # note: also generating standard check_sa.cc and gcheck_sa.cu files, which + # are not used in the REX reweighting + def generate_process_files(self): + """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" + misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') + super().generate_process_files() + misc.sprint('Generating rwgt_runner file') + self.edit_rwgt_runner() + misc.sprint('Finished generating rwgt files') + diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 85e49ffba9..79454f5d26 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -337,3 +337,130 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): return out #------------------------------------------------------------------------------------ + +class RWGT_ProcessExporter(PLUGIN_ProcessExporter): + + oneprocessclass = model_handling.PLUGIN_OneProcessExporterRwgt + + rwgt_names = [] + proc_lines = [] + + + s = PLUGINDIR + '/madgraph/iolibs/template_files/' + from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', + s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], + 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], + 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', + s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', + s+'CMake/src/CMakeLists.txt' ], + 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', + s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', + s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', + s+'gpu/MemoryAccessMatrixElements.h', s+'gpu/MemoryAccessMomenta.h', + s+'gpu/MemoryAccessRandomNumbers.h', s+'gpu/MemoryAccessWeights.h', + s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h', + s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h', + s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h', + s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h', + s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', + s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', + s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', + s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', + s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', + s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', + s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', + s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', + s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', + s+'gpu/perf.py', s+'gpu/profile.sh', + s+'CMake/SubProcesses/CMakeLists.txt'], + 'test': [s+'gpu/cudacpp_test.mk']} + + from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') + from_template['SubProcesses'].append(s+'REX/REX.hpp') + from_template['SubProcesses'].append(s+'REX/teawREX.hpp') + + to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', + 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', + 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', + 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', + 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', + 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', + 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', + 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', + 'EventStatistics.h', 'CommonRandomNumbers.h', + 'CrossSectionKernels.cc', 'CrossSectionKernels.h', + 'MatrixElementKernels.cc', 'MatrixElementKernels.h', + 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', + 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', + 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', + 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', + 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', + 'MadgraphTest.h', 'runTest.cc', + 'testmisc.cc', 'testxxx_cc_ref.txt', + 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 + 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 + 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 + 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 + 'perf.py', 'profile.sh'] + + to_link_in_P.append('rwgt_instance.h') + to_link_in_P.append('REX.hpp') + to_link_in_P.append('teawREX.hpp') + + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex.mk') + + # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): + # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') + # misc.sprint(' type(subproc_group)=%s'%type(subproc_group)) # e.g. madgraph.core.helas_objects.HelasMatrixElement + # misc.sprint(' type(fortran_model)=%s'%type(fortran_model)) # e.g. madgraph.iolibs.helas_call_writers.GPUFOHelasCallWriter + # misc.sprint(' type(me)=%s me=%s'%(type(me) if me is not None else None, me)) # e.g. int + # return super().generate_subprocess_directory(subproc_group, fortran_model, me) + + def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, + proc_number=None): + """Generate the Pxxxxx directory for a subprocess in C++ standalone, + including the necessary .h and .cc files""" + + + process_exporter_cpp = self.oneprocessclass(matrix_element,cpp_helas_call_writer) + + self.rwgt_names.append("P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + + process_lines = "\n".join([process_exporter_cpp.get_process_info_lines(me) for me in \ + process_exporter_cpp.matrix_elements]) + self.proc_lines.append(process_lines) + + # Create the directory PN_xx_xxxxx in the specified path + dirpath = pjoin(self.dir_path, 'SubProcesses', "P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + try: + os.mkdir(dirpath) + except os.error as error: + logger.warning(error.strerror + " " + dirpath) + + with misc.chdir(dirpath): + logger.info('Creating files in directory %s' % dirpath) + process_exporter_cpp.path = dirpath + # Create the process .h and .cc files + process_exporter_cpp.generate_process_files() + for file in self.to_link_in_P: + files.ln('../%s' % file) + return + + def export_driver(self): + replace_dict = {} + replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) + replace_dict['include_lines'] = '' + replace_dict['run_set'] = '' + for name in self.rwgt_names: + replace_dict['include_lines'] += '#include "%s/rwgt_runner.cc"\n' % name + replace_dict['run_set'] += '%s::runner,' % name + replace_dict['run_set'] = replace_dict['run_set'][:-1] + template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) + template = open(pjoin(template_path,'REX', 'rwgt_driver.inc'),'r').read() + ff = open(pjoin(self.dir_path, 'SubProcesses', 'rwgt_driver.cc'),'w') + ff.write(template % replace_dict) + ff.close() + diff --git a/tools/REX/rwgt_driver.inc b/tools/REX/rwgt_driver.inc new file mode 120000 index 0000000000..77a39010fd --- /dev/null +++ b/tools/REX/rwgt_driver.inc @@ -0,0 +1 @@ +rwgt_driver.cc \ No newline at end of file diff --git a/tools/REX/rwgt_runner.inc b/tools/REX/rwgt_runner.inc new file mode 120000 index 0000000000..ff1267c3dc --- /dev/null +++ b/tools/REX/rwgt_runner.inc @@ -0,0 +1 @@ +rwgt_runner.cc \ No newline at end of file From 00500261c88dc3f723cc4ef27832e32d9106fa4b Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 6 Mar 2024 11:15:53 +0100 Subject: [PATCH 06/76] small modifications and added files, checking fbridge which currently does ntot compile(?) --- .gitmodules | 2 +- MG5aMC/mg5amcnlo | 2 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 2 + tools/REX/10simevs.lhe | 550 +++++ tools/REX/2diffevs.lhe | 400 ++++ tools/REX/2simevs.lhe | 407 ++++ tools/REX/REX.hpp | 17 +- tools/REX/rwgt_driver.cc | 3 +- tools/REX/rwgt_runner.cc | 4 +- tools/REX/teawREX.hpp | 4 +- tools/REX/tester.cpp | 62 + tools/REX/unweighted_events.lhe | 1870 +++++++++++++++++ 12 files changed, 3306 insertions(+), 17 deletions(-) create mode 100644 tools/REX/10simevs.lhe create mode 100644 tools/REX/2diffevs.lhe create mode 100644 tools/REX/2simevs.lhe create mode 100644 tools/REX/tester.cpp create mode 100644 tools/REX/unweighted_events.lhe diff --git a/.gitmodules b/.gitmodules index 997b366b8f..6fbb5110b6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "MG5aMC/mg5amcnlo"] path = MG5aMC/mg5amcnlo - url = https://github.com/zeniheisser/mg5amcnlo + url = https://github.com/zeniheisser/mg5amcnlo/ branch = rexCPP diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index e428e38c67..a458c4c92b 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit e428e38c6798093f7077ba3ea6e554858ebcf1ab +Subproject commit a458c4c92b1887bb006f5b99b0c94059ec2c29fa diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 79454f5d26..229a7dac94 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -449,6 +449,8 @@ def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, return def export_driver(self): + misc.sprint("In export_driver") + misc.sprint("Current working directory is: %s" % self.dir_path) replace_dict = {} replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) diff --git a/tools/REX/10simevs.lhe b/tools/REX/10simevs.lhe new file mode 100644 index 0000000000..23432065d5 --- /dev/null +++ b/tools/REX/10simevs.lhe @@ -0,0 +1,550 @@ + +
+ + +3.5.2 + + + t t~ > w+ b w- b~ +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +g g > t t~ > w+ b w- b~ #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### +###################################################################### +## ## +## Width set on Auto will be computed following the information ## +## present in the decay.py files of the model. ## +## See arXiv:1402.1178 for more details. ## +## ## +###################################################################### + +################################### +## INFORMATION FOR MASS +################################### +Block mass + 5 4.700000e+00 # MB + 6 1.730000e+02 # MT + 15 1.777000e+00 # MTA + 23 9.118800e+01 # MZ + 25 1.250000e+02 # MH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) + +################################### +## INFORMATION FOR SMINPUTS +################################### +Block sminputs + 1 1.325070e+02 # aEWM1 + 2 1.166390e-05 # Gf + 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) + +################################### +## INFORMATION FOR YUKAWA +################################### +Block yukawa + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau + +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # WT +DECAY 23 2.441404e+00 # WZ +DECAY 24 2.047600e+00 # WW +DECAY 25 6.382339e-03 # WH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 10 +# Integrated weight (pb) : 439.19338 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +4.391934e+02 3.661122e+00 4.391934e+02 1 +please cite 1405.0301 + + + 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.21669541E+03 +0 + 1 21 0.24236690E-01 0.21840939E+03 + 1 21 0.65523357E-01 0.21840939E+03 + 0.33953413E+04 + + + + 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.31576070E+03 +0 + 1 21 0.81905139E-01 0.31588770E+03 + 1 21 0.32388313E-01 0.31588770E+03 + 0.11189986E+04 + + + + 7 1 +4.3919338e+02 2.51159400e+02 7.54677100e-03 1.11876800e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +8.2851295259e+01 8.2851295259e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.1984564692e+03 2.1984564692e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 502 -7.7080486205e+01 -1.0172807019e+02 -7.8449770191e+01 2.2872812745e+02 1.7283572612e+02 0.0000e+00 0.0000e+00 + 24 1 1 2 0 0 +5.9800337718e+00 +7.0764350150e+01 -4.1759378302e+02 4.3115558402e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + 5 1 1 2 501 0 +7.1100452433e+01 +3.0963720041e+01 -1.6195616207e+03 1.6214240530e+03 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 -4.7255780753e+01 +1.4691445127e+01 -3.8855826763e+01 1.0210748553e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 502 -2.9824705452e+01 -1.1641951532e+02 -3.9593943428e+01 1.2662064191e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.24154488E+03 +0 + 1 21 0.12746352E-01 0.25115937E+03 + 1 21 0.33822410E+00 0.25115937E+03 + 0.10034989E+03 + + + + 8 1 +4.3919338e+02 1.78714900e+02 7.54677100e-03 1.16958300e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +4.3091677531e+02 4.3091677531e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.0391889102e+02 1.0391889102e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 -3.6067364068e+01 -1.4649928734e+01 +1.7471337281e+01 1.7742037393e+02 1.7221285172e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 +3.6067364068e+01 +1.4649928734e+01 +3.0952654701e+02 3.5741529240e+02 1.7442342955e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +2.2734065795e+01 +3.3518972368e+01 +6.3307765789e+01 1.1007019769e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + 5 1 4 4 501 0 +1.3333298273e+01 -1.8869043633e+01 +2.4621878122e+02 2.4734509471e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +3.4800320870e+01 +1.3510658921e+01 +4.0711502437e+01 9.7561481242e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -7.0867684937e+01 -2.8160587655e+01 -2.3240165156e+01 7.9858892692e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.17817321E+03 +0 + 1 21 0.66294888E-01 0.17871488E+03 + 1 21 0.15987522E-01 0.17871488E+03 + 0.73335693E+04 + + + + 8 1 +4.3919338e+02 2.04872300e+02 7.54677100e-03 1.14579500e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7895944857e+02 3.7895944857e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.5403273068e+02 4.5403273068e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 6 2 1 2 501 0 -9.4158773160e+01 +4.2436191949e+01 -3.9984076151e+02 4.4927194413e+02 1.7693484753e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 +9.4158773160e+01 -4.2436191949e+01 +3.2476747940e+02 3.8372023512e+02 1.7635361696e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 -1.0155138529e+02 +3.1755977097e+01 -4.1062842852e+02 4.3174535111e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 3 3 501 0 +7.3926121347e+00 +1.0680214852e+01 +1.0787667014e+01 1.7526593025e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 +2.8155867650e+01 -1.5152453482e+01 +2.9281756166e+02 3.0533864823e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + -5 1 4 4 0 502 +6.6002905510e+01 -2.7283738467e+01 +3.1949917737e+01 7.8381586893e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.20474671E+03 +0 + 1 21 0.58301453E-01 0.20487227E+03 + 1 21 0.69851190E-01 0.20487227E+03 + 0.50816560E+03 + + + + 8 1 +4.3919338e+02 2.01015900e+02 7.54677100e-03 1.15150100e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.0319621128e+02 6.0319621128e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -9.8768974383e+01 9.8768974383e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +1.4738675699e+01 +7.6352852286e+01 +4.6116481070e+01 2.0623803560e+02 1.8536561409e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -1.4738675699e+01 -7.6352852286e+01 +4.5831075582e+02 4.9572715006e+02 1.7219080054e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -4.3843112218e+00 -4.1059533654e+00 +3.8045368848e+02 3.8890655185e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -1.0354364477e+01 -7.2246898921e+01 +7.7857067340e+01 1.0682059821e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +2.7266458026e+01 +1.2364655742e+02 +4.6653600940e+01 1.5708502580e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + -5 1 3 3 0 503 -1.2527782328e+01 -4.7293705129e+01 -5.3711986978e-01 4.9153009803e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.19792528E+03 +0 + 1 21 0.92799415E-01 0.20101591E+03 + 1 21 0.15195227E-01 0.20101591E+03 + 0.37319721E+04 + + + + 8 1 +4.3919338e+02 1.74602100e+02 7.54677100e-03 1.17351100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +9.1248808608e+02 9.1248808608e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -3.9355927787e+01 3.9355927787e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 6 2 1 2 501 0 -2.5269092142e+00 -7.0232873774e+00 +2.4558729161e+02 3.0046206483e+02 1.7294109624e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 +2.5269092142e+00 +7.0232873774e+00 +6.2754486669e+02 6.5138194904e+02 1.7444246039e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +6.0339304741e+01 -1.2037996311e+01 +1.0278716341e+02 1.4428500258e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 3 3 501 0 -6.2866213955e+01 +5.0147089339e+00 +1.4280012819e+02 1.5617706226e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.3438702161e+01 -4.5120986152e+01 +2.2033674694e+02 2.4000161283e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 4 4 0 502 +2.5965611375e+01 +5.2144273529e+01 +4.0720811975e+02 4.1138033621e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.17422586E+03 +0 + 1 21 0.14038277E+00 0.17460207E+03 + 1 21 0.60547588E-02 0.17460207E+03 + 0.69618335E+04 + + + + 8 1 +4.3919338e+02 1.88007500e+02 7.54677100e-03 1.16157200e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.3582462508e+02 6.3582462508e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.6129828482e+02 1.6129828482e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +4.8116344176e+01 -2.9059974330e+01 -8.6672370246e+01 2.0702396972e+02 1.7940780248e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -4.8116344176e+01 +2.9059974330e+01 +5.6119871050e+02 5.9009894018e+02 1.7353127052e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -1.6998242631e+01 +4.7411725194e+01 +1.5136221811e+02 1.7864643563e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -3.1118101545e+01 -1.8351750864e+01 +4.0983649239e+02 4.1145250455e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +3.9713907948e+01 -3.9675651300e+00 +2.4165830325e+01 9.2973863893e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 3 3 0 503 +8.4024362278e+00 -2.5092409200e+01 -1.1083820057e+02 1.1405010583e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.18659178E+03 +0 + 1 21 0.97819172E-01 0.18800751E+03 + 1 21 0.24815121E-01 0.18800751E+03 + 0.13327893E+04 + + + + 8 1 +4.3919338e+02 2.47273800e+02 7.54677100e-03 1.11506100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +4.7922758970e+02 4.7922758970e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.2754705753e+02 1.2754705753e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.6471793564e+02 +6.3009040540e+01 +1.7155189565e+02 3.0095574765e+02 1.7332677701e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.6471793564e+02 -6.3009040540e+01 +1.8012863652e+02 3.0581889959e+02 1.7313791075e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.6303415524e+01 +9.2300656218e+01 +8.8725358462e+01 1.6133471705e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.0841452012e+02 -2.9291615679e+01 +8.2826537186e+01 1.3962103059e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -1.6187054837e+02 -9.3582557312e+01 +1.1528314764e+02 2.3391705698e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 4 4 0 502 -2.8473872739e+00 +3.0573516772e+01 +6.4845488879e+01 7.1901842605e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.24724066E+03 +0 + 1 21 0.73727321E-01 0.24727375E+03 + 1 21 0.19622624E-01 0.24727375E+03 + 0.38478950E+04 + + + + 8 1 +4.3919338e+02 1.82375300e+02 7.54677100e-03 1.16561400e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +3.2305192784e+02 3.2305192784e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.3439332851e+02 1.3439332851e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +4.6712402287e+01 -3.3471733509e+01 -1.6591366530e+01 1.8312847738e+02 1.7308483560e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -4.6712402287e+01 +3.3471733509e+01 +2.0524996585e+02 2.7431677897e+02 1.7268393460e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -5.0429547514e+01 +6.7234938560e+01 +2.0963658148e+02 2.3974650878e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 +3.7171452269e+00 -3.3763205051e+01 -4.3866156262e+00 3.4570270185e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +2.0594294555e+01 +4.7013575059e+01 +5.4619595756e+00 9.5558621614e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 +2.6118107732e+01 -8.0485308568e+01 -2.2053326106e+01 8.7569855767e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.18228016E+03 +0 + 1 21 0.49700296E-01 0.18237534E+03 + 1 21 0.20675897E-01 0.18237534E+03 + 0.84041211E+04 + + +
diff --git a/tools/REX/2diffevs.lhe b/tools/REX/2diffevs.lhe new file mode 100644 index 0000000000..634129df21 --- /dev/null +++ b/tools/REX/2diffevs.lhe @@ -0,0 +1,400 @@ + +
+ + +3.5.2 + + + 3j +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +p p > 3j #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### +###################################################################### +################################### +## INFORMATION FOR MASS +################################### +BLOCK MASS # + 5 4.700000e+00 # mb + 6 1.730000e+02 # mt + 15 1.777000e+00 # mta + 23 9.118800e+01 # mz + 25 1.250000e+02 # mh + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) +################################### +## INFORMATION FOR SMINPUTS +################################### +BLOCK SMINPUTS # + 1 1.325070e+02 # aewm1 + 2 1.166390e-05 # gf + 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) +################################### +## INFORMATION FOR YUKAWA +################################### +BLOCK YUKAWA # + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # wt +DECAY 23 2.441404e+00 # wz +DECAY 24 2.047600e+00 # ww +DECAY 25 6.382339e-03 # wh +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 100 +# Integrated weight (pb) : 66372287.22200001 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +6.637229e+07 1.268397e+06 6.637229e+07 1 +please cite 1405.0301 + + + 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32555892E+02 +0 + 1 21 0.43037713E-01 0.32555892E+02 + 1 21 0.77100414E-02 0.32555892E+02 + 0.65037882E+05 + + + + 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30590836E+02 +0 + 1 21 0.97313711E-01 0.30590836E+02 + 1 2 0.70353702E-01 0.30590836E+02 + 0.91658669E+02 + + +
diff --git a/tools/REX/2simevs.lhe b/tools/REX/2simevs.lhe new file mode 100644 index 0000000000..6fb52dd403 --- /dev/null +++ b/tools/REX/2simevs.lhe @@ -0,0 +1,407 @@ + +
+ + +3.5.2 + + + t t~ > w+ b w- b~ +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +g g > t t~ > w+ b w- b~ #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### +###################################################################### +## ## +## Width set on Auto will be computed following the information ## +## present in the decay.py files of the model. ## +## See arXiv:1402.1178 for more details. ## +## ## +###################################################################### + +################################### +## INFORMATION FOR MASS +################################### +Block mass + 5 4.700000e+00 # MB + 6 1.730000e+02 # MT + 15 1.777000e+00 # MTA + 23 9.118800e+01 # MZ + 25 1.250000e+02 # MH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) + +################################### +## INFORMATION FOR SMINPUTS +################################### +Block sminputs + 1 1.325070e+02 # aEWM1 + 2 1.166390e-05 # Gf + 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) + +################################### +## INFORMATION FOR YUKAWA +################################### +Block yukawa + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau + +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # WT +DECAY 23 2.441404e+00 # WZ +DECAY 24 2.047600e+00 # WW +DECAY 25 6.382339e-03 # WH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 2 +# Integrated weight (pb) : 439.19338 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +4.391934e+02 3.661122e+00 4.391934e+02 1 +please cite 1405.0301 + + + 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.21669541E+03 +0 + 1 21 0.24236690E-01 0.21840939E+03 + 1 21 0.65523357E-01 0.21840939E+03 + 0.33953413E+04 + + + + 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.31576070E+03 +0 + 1 21 0.81905139E-01 0.31588770E+03 + 1 21 0.32388313E-01 0.31588770E+03 + 0.11189986E+04 + + +
diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index c97f3e6a27..bbf7596c2f 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -1294,10 +1294,10 @@ namespace REX return content; } lhePrt(){ return; } - lhePrt( std::pair prtInfo ){ - status = std::to_string( prtInfo.first ); - pdg = std::to_string( prtInfo.second ); - } + // lhePrt( std::pair prtInfo ){ + // status = std::to_string( prtInfo.first ); + // pdg = std::to_string( prtInfo.second ); + // } lhePrt( std::pair& prtInfo ){ status = std::to_string( prtInfo.first ); pdg = std::to_string( prtInfo.second ); @@ -1378,9 +1378,9 @@ namespace REX return modStat; } event(){ return; } - event( std::vector> prtInfo ){ + event( std::vector>& prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); - for( auto prt : prtInfo ){ + for( auto& prt : prtInfo ){ prts.push_back( std::make_shared( prt ) ); } } @@ -3447,15 +3447,14 @@ namespace REX template std::shared_ptr> vectorFlat( std::vector>> vecVec ) { - if( vecVec.size() == relProcs.size() ) continue; - else throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); for( size_t k = 0 ; k < vecVec.size() ; ++k){ if( vecVec[k]->size() == relProcs[k]->size() ) continue; else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); } auto flatVec = std::make_shared>(relProcs[0]->size()); for( size_t k = 0 ; k < relProcs.size() ; ++k ){ - currInd = 0; + size_t currInd = 0; for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ if( relProcs[k]->at(j) ){ flatVec->at(currInd) = vecVec[k]->at(currInd); diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 57838a2dd0..f4c6ab927f 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -15,7 +15,7 @@ #include "rwgt_instance.h" #include #include -//%(include_lines)s +%(include_lines)s int usage( char* argv0, int ret = 1 ) { @@ -89,7 +89,6 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) -//%(rwgt_runners)s std::vector runSet = {%(run_set)s}; std::vector runSet; diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index a770bf69aa..689daabcdd 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -95,8 +95,8 @@ namespace %(process_namespace)s{ }; std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ - std::vector initPrts = %(init_prt_ids)s - std::vector finPrts = %(fin_prt_ids)s + std::vector initPrts = %(init_prt_ids)s; + std::vector finPrts = %(fin_prt_ids)s; // std::vector initPrts = {"-1"}; // std::vector finPrts = {"1"}; if( status == "-1" ){ diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index e6b2c5f1e3..2c3c7ec7d1 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -450,7 +450,7 @@ namespace REX::teaw initMEs = {}; for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) { - auto ins = meEvals[eventFile.subProcs[k]]( *(momenta[k]), *(gS[k]) ); + auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); } //auto ins = meEval( *(momenta[0]), *(gS[0]) ); @@ -487,7 +487,7 @@ namespace REX::teaw } template void setNormWgts(Args&&... args){ - if( !oneME() ){ setMEs(args); } + if( !oneME() ){ setMEs(args...); } //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); for( auto k = 0; k < initMEs.size() ; ++k ){ diff --git a/tools/REX/tester.cpp b/tools/REX/tester.cpp new file mode 100644 index 0000000000..d7d8493c25 --- /dev/null +++ b/tools/REX/tester.cpp @@ -0,0 +1,62 @@ +#include "teawREX.hpp" +#include +#include + +std::shared_ptr> meEval( std::vector& x, std::vector& y){ + int random = rand() % 10; + if( random == 0 ){ random = 11; } + auto thisIsIt = std::make_shared>( y.size(), random ); + return thisIsIt; +} + +std::shared_ptr> sortFunc(std::vector arguments){ + return REX::stoiSort(arguments); +} + +std::shared_ptr> sorterFunc(std::string_view dummy, std::vector arguments){ + return REX::stoiSort(arguments); +} + +int main( int argc, char* argv[] ){ + std::string lheFilePath; + + // READ COMMAND LINE ARGUMENTS + for( int arg = 0; arg < argc; arg++ ) + { + auto currArg = std::string( argv[arg] ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + } + + + std::string currPath = argv[0]; + auto sembler = std::function>(std::vector)>(sortFunc); + auto sembler2 = std::function>(std::string_view, std::vector)>(sorterFunc); + auto lheFile = REX::filePuller(lheFilePath); + //std::cout << lheFile->substr(0, 1) << "\n"; + //std::cout << bool(lheFile->compare(0, 1, "<")) << "\n"; + //std::cout << lheFile->substr(1968, 1999 - 1968) << "\n"; + auto parseLhe = REX::lheNode(*lheFile); + //std::cout << *parseLhe.nodeWriter() << "\n"; + auto treeMan = parseLhe.getTree(); + //std::cout << parseLhe.getChildren().size() << " & " << parseLhe.getEvents().size() << " & " << treeMan.getChildren()->size() << "\n"; + auto proceses = REX::lheReOrder(parseLhe, {"-1", "1", "2"} ); + auto processes2 = REX::lheEvReOrder(parseLhe, {"-1", "1", "2"} ); + //std::cout << proceses.size() << " & " << processes2.size() << "\n"; + bool comp = REX::evProcComp( *parseLhe.getEvents()[0], *parseLhe.getEvents()[1], {"-1", "1"} ); + if( comp ){ std::cout << "true\n"; } + else{ std::cout << "false\n"; } + auto evlist = REX::evProcessPull( parseLhe, {"-1", "1"} ); + //auto evsVals = lheValDoubles(parseLhe); + auto evsVals = lheValDoubles(parseLhe, sembler2); + int siz = 0; + for( auto& ev : *evsVals ){ + siz += ev->size(); + } + std::cout << evsVals->size() << "\n"; + std::cout << siz << "\n"; + return 0; + +} \ No newline at end of file diff --git a/tools/REX/unweighted_events.lhe b/tools/REX/unweighted_events.lhe new file mode 100644 index 0000000000..6b05b56584 --- /dev/null +++ b/tools/REX/unweighted_events.lhe @@ -0,0 +1,1870 @@ + +
+ + +3.5.2 + + + 3j +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +p p > 3j #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### +###################################################################### +################################### +## INFORMATION FOR MASS +################################### +BLOCK MASS # + 5 4.700000e+00 # mb + 6 1.730000e+02 # mt + 15 1.777000e+00 # mta + 23 9.118800e+01 # mz + 25 1.250000e+02 # mh + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) +################################### +## INFORMATION FOR SMINPUTS +################################### +BLOCK SMINPUTS # + 1 1.325070e+02 # aewm1 + 2 1.166390e-05 # gf + 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) +################################### +## INFORMATION FOR YUKAWA +################################### +BLOCK YUKAWA # + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # wt +DECAY 23 2.441404e+00 # wz +DECAY 24 2.047600e+00 # ww +DECAY 25 6.382339e-03 # wh +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 100 +# Integrated weight (pb) : 66372287.22200001 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +6.637229e+07 1.268397e+06 6.637229e+07 1 +please cite 1405.0301 + + + 5 1 +6.6372287e+07 4.60140800e+01 7.54677100e-03 1.46810800e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.8788806474e+02 1.8788806474e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.0556910363e+01 3.0556910363e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.0298827890e+01 -4.1053633424e+01 +8.3051244550e+01 9.3214676391e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +1.7276524932e+01 -1.2156784273e+01 -1.1495329061e+01 2.4050120744e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -6.9776970424e+00 +5.3210417698e+01 +8.5775238884e+01 1.0118017797e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.46014081E+02 +0 + 1 21 0.28905856E-01 0.46014081E+02 + 1 21 0.47010632E-02 0.46014081E+02 + 0.31830845E+06 + + + + 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32555892E+02 +0 + 1 21 0.43037713E-01 0.32555892E+02 + 1 21 0.77100414E-02 0.32555892E+02 + 0.65037882E+05 + + + + 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30590836E+02 +0 + 1 21 0.97313711E-01 0.30590836E+02 + 1 2 0.70353702E-01 0.30590836E+02 + 0.91658669E+02 + + + + 5 1 +6.6372287e+07 1.24970000e+02 7.54677100e-03 1.23511600e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +6.4054339688e+02 6.4054339688e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.3928351011e+01 3.3928351011e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -3.6546574781e+01 +7.3293152180e+00 +5.3085336864e+01 6.4864658942e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 -8.4789922053e+01 +1.0871076160e+01 +6.9212770934e+01 1.0999053977e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +1.2133649683e+02 -1.8200391378e+01 +4.8431693807e+02 4.9961654918e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.12497005E+03 +0 + 1 21 0.98545129E-01 0.12497005E+03 + 1 21 0.52197468E-02 0.12497005E+03 + 0.21698561E+05 + + + + 5 1 +6.6372287e+07 2.09917500e+01 7.54677100e-03 1.72629600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.9393491974e+01 2.9393491974e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0612634540e+01 5.0612634540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.2361493101e+01 -8.0134576492e+00 -2.5339678876e+01 3.4732566890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +5.3440837509e+00 +2.0304167068e+01 +1.0307030697e+01 2.3389170854e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +1.7017409350e+01 -1.2290709419e+01 -6.1864943863e+00 2.1884388769e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.20991755E+02 +0 + 1 21 0.45220758E-02 0.20991755E+02 + 1 21 0.77865590E-02 0.20991755E+02 + 0.28846636E+07 + + + + 5 1 +6.6372287e+07 2.01883800e+01 7.54677100e-03 1.74160800e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.9616331394e+01 2.9616331394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.1809592212e+02 2.1809592212e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.4332017667e+01 -1.5898231494e+01 -1.1283261663e+02 1.1484493837e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +5.5127621513e+00 +1.9607065270e+01 -9.9531289229e+01 1.0159382408e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.9844779818e+01 -3.7088337755e+00 +2.3884315130e+01 3.1273491063e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.20188381E+02 +0 + 1 21 0.45563588E-02 0.20188381E+02 + 1 21 0.33553218E-01 0.20188381E+02 + 0.23199633E+06 + + + + 5 1 +6.6372287e+07 2.83114100e+01 7.54677100e-03 1.61754100e-01 + -3 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.8261154183e+01 2.8261154183e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -6.5070264344e+01 6.5070264344e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +7.3144338996e+00 +3.7539358060e+01 -8.3663539266e+00 3.9149715515e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.5538451858e+01 -1.6013356486e+01 -1.8894895213e+01 2.9238470159e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -3 1 1 2 0 504 +8.2240179584e+00 -2.1526001574e+01 -9.5478610208e+00 2.4943232854e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.28311412E+02 +0 + 1 21 0.10010810E-01 0.28311412E+02 + 1 -3 0.43478699E-02 0.28311412E+02 + 0.75606750E+05 + + + + 5 1 +6.6372287e+07 2.50484100e+01 7.54677100e-03 1.66030800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.5278855952e+02 4.5278855952e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3454632319e+00 3.3454632319e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -1.5102249073e+01 -2.7392413109e+01 +1.7894067235e+02 1.8165402953e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.1057931191e+01 +3.9670307239e+00 +9.9776507011e+01 1.0205158083e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -5.9556821180e+00 +2.3425382385e+01 +1.7072591693e+02 1.7242841240e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.25048406E+02 +0 + 1 21 0.69659730E-01 0.25048406E+02 + 1 21 0.51468701E-03 0.25048406E+02 + 0.16161844E+07 + + + + 5 1 +6.6372287e+07 6.54738600e+01 7.54677100e-03 1.37619800e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +2.6663794394e+01 2.6663794394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.5265738923e+02 2.5265738923e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 -7.6137868107e+00 +4.2439462980e+01 -1.6255497692e+02 1.6817609310e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.7587948234e+01 +1.0621679064e+01 -4.5177420050e+01 4.9630185085e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +2.5201735045e+01 -5.3061142044e+01 -1.8261197867e+01 6.1514905444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.65473858E+02 +0 + 1 21 0.41021221E-02 0.65473858E+02 + 1 1 0.38870368E-01 0.65473858E+02 + 0.41073273E+05 + + + + 5 1 +6.6372287e+07 4.71053000e+01 7.54677100e-03 1.46161100e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.0723487937e+02 1.0723487937e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2434583342e+02 1.2434583342e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -1.5074548460e+01 +4.4668996332e+01 +7.0907382043e+01 8.5149386802e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -3.7446327852e+01 -2.8577640944e+01 -7.7213750461e+01 9.0448174619e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +5.2520876312e+01 -1.6091355388e+01 -1.0804585631e+01 5.5983151371e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.47105297E+02 +0 + 1 21 0.16497674E-01 0.47105297E+02 + 1 21 0.19130128E-01 0.47105297E+02 + 0.81247298E+05 + + + + 5 1 +6.6372287e+07 4.77488600e+01 7.54677100e-03 1.45787600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.3351097238e+02 1.3351097238e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.1959914093e+03 2.1959914093e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.6534412892e+01 +2.0887502154e+01 -2.0204850067e+03 2.0207671872e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.7723341226e+01 -6.8071401227e+01 -1.6302498162e+02 1.7882797305e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 -1.1889283334e+00 +4.7183899072e+01 +1.2102955134e+02 1.2990722143e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.47748865E+02 +0 + 1 21 0.20540149E-01 0.47748865E+02 + 1 21 0.33784484E+00 0.47748865E+02 + 0.69049208E+02 + + + + 5 1 +6.6372287e+07 5.17648700e+01 7.54677100e-03 1.43604800e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.0572435077e+02 7.0572435077e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -8.6069634546e+00 8.6069634546e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.3775888591e+01 -5.0832360721e+00 +1.9988986553e+01 3.1475256166e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +2.1064391322e+01 -6.4983212153e+00 +5.8809348241e+01 6.2805065090e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 502 -4.4840279913e+01 +1.1581557287e+01 +6.1831905252e+02 6.2005099297e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.51764867E+02 +0 + 1 21 0.13241488E-02 0.51764867E+02 + 1 -2 0.10857293E+00 0.51764867E+02 + 0.12387408E+05 + + + + 5 1 +6.6372287e+07 2.68215700e+01 7.54677100e-03 1.63613700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.7995183998e+01 4.7995183998e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -4.6755507222e+02 4.6755507222e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -5.6572765501e+00 -2.1816941248e+01 +1.1968309353e+01 2.5519093482e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.3663698968e+01 +1.2778475361e+01 -4.5222293332e+02 4.5302189959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +2.9320975518e+01 +9.0384658865e+00 +2.0694735751e+01 3.7009263148e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26821571E+02 +0 + 1 21 0.73838749E-02 0.26821571E+02 + 1 21 0.71931546E-01 0.26821571E+02 + 0.24837378E+05 + + + + 5 1 +6.6372287e+07 4.31543000e+01 7.54677100e-03 1.48620400e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +7.1563261884e+02 7.1563261884e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4644760996e+01 1.4644760996e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.2645094462e+01 -4.3267730831e+01 +6.6587442685e+02 6.6739849211e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -1.7724111495e+01 +1.5186816241e+01 +1.2556432303e+01 2.6503726304e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +5.0790170329e+00 +2.8080914591e+01 +2.2556998696e+01 3.6375161422e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43154296E+02 +0 + 1 21 0.11009730E+00 0.43154296E+02 + 1 21 0.22530407E-02 0.43154296E+02 + 0.66560154E+05 + + + + 5 1 +6.6372287e+07 4.37774800e+01 7.54677100e-03 1.48212100e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.6494856549e+02 2.6494856549e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4840936233e+01 4.4840936233e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 -2.3919572741e+01 -2.1836315356e+01 -2.8033165864e+01 4.2834904188e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +2.0968394452e+01 -2.8904789122e+01 +4.4445065176e+01 5.7013368771e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +2.9511782884e+00 +5.0741104479e+01 +2.0369572994e+02 2.0994122876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43777479E+02 +0 + 1 21 0.40761317E-01 0.43777479E+02 + 1 21 0.68986058E-02 0.43777479E+02 + 0.88070658E+05 + + + + 5 1 +6.6372287e+07 3.19042100e+01 7.54677100e-03 1.57794600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.5412150098e+01 3.5412150098e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.5668427371e+02 7.5668427371e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +4.2078004819e+00 -3.1373137318e+01 -7.1649764593e+02 7.1719652534e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +1.7730843460e+01 -1.5141321578e+01 -1.1865670592e+00 2.3346313849e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -2.1938643942e+01 +4.6514458896e+01 -3.5879106177e+00 5.1553584618e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.31904206E+02 +0 + 1 21 0.54480234E-02 0.31904206E+02 + 1 21 0.11641296E+00 0.31904206E+02 + 0.14302972E+05 + + + + 5 1 +6.6372287e+07 2.62752100e+01 7.54677100e-03 1.64333300e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +7.3680834147e+01 7.3680834147e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -4.3076387169e+02 4.3076387169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +5.3624128570e+00 +1.9370699338e+01 -1.8582414279e+02 1.8690797605e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.5600742238e+01 -2.1293512835e+01 +7.0284358620e+01 7.5077878991e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.0963155095e+01 +1.9228134974e+00 -2.4154325337e+02 2.4245885080e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26275207E+02 +0 + 1 21 0.11335513E-01 0.26275207E+02 + 1 2 0.66271366E-01 0.26275207E+02 + 0.56080712E+04 + + + + 5 1 +6.6372287e+07 4.55308900e+01 7.54677100e-03 1.47105400e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.1459625930e+03 2.1459625930e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5418446222e+00 5.5418446222e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +3.4560110742e+01 -1.7822362191e+01 +1.6729667012e+02 1.7175626242e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +8.6373215770e+00 +3.7927160061e+01 +1.2285930833e+02 1.2887002149e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -4.3197432319e+01 -2.0104797870e+01 +1.8502647699e+03 1.8508781537e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.45530891E+02 +0 + 1 21 0.85259319E-03 0.45530891E+02 + 1 2 0.33014743E+00 0.45530891E+02 + 0.37889394E+05 + + + + 5 1 +6.6372287e+07 3.49649300e+01 7.54677100e-03 1.54891200e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0100600886e+00 8.0100600886e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.6748462249e+03 1.6748462249e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -2.8388621465e+01 +1.5105638110e+01 -1.9595061691e+02 1.9857174623e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 +5.9035355306e+00 -3.7141587409e+01 -1.4389449039e+03 1.4394362736e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 +2.2485085934e+01 +2.2035949299e+01 -3.1940644021e+01 4.4848265200e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34964932E+02 +0 + 1 2 0.25766864E+00 0.34964932E+02 + 1 -2 0.12323170E-02 0.34964932E+02 + 0.15263237E+04 + + + + 5 1 +6.6372287e+07 3.04072400e+01 7.54677100e-03 1.59363000e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.3031354025e+01 2.3031354025e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8224559169e+02 1.8224559169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.5754317368e+01 -4.7408923451e+01 -9.5689678327e+01 1.0985174293e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.1009373966e+01 +2.4882397341e+01 -8.6110655342e-01 2.7222812438e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -1.4744943402e+01 +2.2526526110e+01 -6.2663452789e+01 6.8202390354e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30407236E+02 +0 + 1 21 0.35432851E-02 0.30407236E+02 + 1 21 0.28037785E-01 0.30407236E+02 + 0.50703811E+06 + + + + 5 1 +6.6372287e+07 2.46316000e+01 7.54677100e-03 1.66635000e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0590561410e+01 8.0590561410e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.2809489236e+02 6.2809489236e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.7815991771e+01 +2.0155583443e+01 -8.5520591269e+00 2.8227554305e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -7.5126314979e+00 -1.9813252642e+01 -6.0724710539e+02 6.0761669795e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +2.5328623269e+01 -3.4233080119e-01 +6.8294833568e+01 7.2841201522e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.24631597E+02 +0 + 1 21 0.96629979E-01 0.24631597E+02 + 1 -1 0.12398548E-01 0.24631597E+02 + 0.37172940E+03 + + + + 5 1 +6.6372287e+07 2.48386400e+01 7.54677100e-03 1.66333000e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +3.4621419117e+02 3.4621419117e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.8471995540e+01 1.8471995540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +2.4946999999e+01 +1.5290445725e+00 +2.8662250343e+02 2.8771018449e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 -1.3234624373e+00 -2.4231607655e+01 -1.6739991815e-01 2.4268300005e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 502 -2.3623537562e+01 +2.2702563083e+01 +4.1287092120e+01 5.2707702219e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.24838643E+02 +0 + 1 21 0.53263717E-01 0.24838643E+02 + 1 21 0.28418457E-02 0.24838643E+02 + 0.20767655E+06 + + + + 5 1 +6.6372287e+07 3.39483100e+01 7.54677100e-03 1.55814300e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.5448573077e+01 2.5448573077e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.0820396951e+03 1.0820396951e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +4.5361883356e+01 +2.5711927708e+01 -5.3689272592e+02 5.3941876389e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 -1.6783464189e+01 -2.3932337766e+01 -5.3486766152e+02 5.3566580701e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 -2.8578419167e+01 -1.7795899428e+00 +1.5169265433e+01 3.2403697261e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33948308E+02 +0 + 1 2 0.16646767E+00 0.33948308E+02 + 1 -2 0.39151646E-02 0.33948308E+02 + 0.59650818E+03 + + + + 5 1 +6.6372287e+07 4.00572800e+01 7.54677100e-03 1.50779000e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.2066229463e+01 3.2066229463e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.0083738526e+02 3.0083738526e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +3.4238964599e+01 -3.1475020468e+00 -2.1121471239e+02 2.1399501909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.6612873637e+01 +3.9603631259e+01 +6.1874354643e+00 4.3390316167e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.7626090962e+01 -3.6456129213e+01 -6.3743878874e+01 7.5518279467e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40057279E+02 +0 + 1 21 0.49332658E-02 0.40057279E+02 + 1 21 0.46282677E-01 0.40057279E+02 + 0.11855536E+06 + + + + 5 1 +6.6372287e+07 4.37051900e+01 7.54677100e-03 1.48259100e-01 + 2 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +1.7110304904e+03 1.7110304904e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.0783382913e+01 2.0783382913e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.4418127206e+01 +2.0747890384e+01 +7.9570356529e+01 8.3485321978e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +3.6111610870e+01 -3.0183395268e+01 +1.6215922235e+03 1.6222750769e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 502 0 -2.1693483664e+01 +9.4355048838e+00 -1.0915472595e+01 2.6053474392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.43705192E+02 +0 + 1 2 0.26323535E+00 0.43705192E+02 + 1 1 0.31974449E-02 0.43705192E+02 + 0.43186860E+03 + + + + 5 1 +6.6372287e+07 3.25233300e+01 7.54677100e-03 1.57176200e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.1530668898e+01 2.1530668898e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -5.8911499310e+02 5.8911499310e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +3.1914103860e+01 -3.6894354070e+01 -1.3312376381e+02 1.4178025208e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.0765125773e+01 +2.9189748902e+01 +2.6051922163e+00 3.1220448433e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.1148978087e+01 +7.7046051684e+00 -4.3706575261e+02 4.3764496149e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32523330E+02 +0 + 1 21 0.33124105E-02 0.32523330E+02 + 1 2 0.90633079E-01 0.32523330E+02 + 0.30633976E+05 + + + + 5 1 +6.6372287e+07 3.61852100e+01 7.54677100e-03 1.53832100e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.3002025114e+01 4.3002025114e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3404139915e+02 3.3404139915e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +5.4545664238e+00 -2.1319807632e+01 -2.9481524350e+02 2.9563544153e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.1100319594e+01 -8.7134369374e+00 +9.7127797801e+00 3.3726724614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -3.6554886018e+01 +3.0033244569e+01 -5.9369103163e+00 4.7681258113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.36185209E+02 +0 + 1 21 0.66156963E-02 0.36185209E+02 + 1 21 0.51390983E-01 0.36185209E+02 + 0.59410236E+05 + + + + 5 1 +6.6372287e+07 2.68952500e+01 7.54677100e-03 1.63518300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.8770752959e+02 2.8770752959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.6931152162e+02 2.6931152162e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.0961573832e+01 +2.3688081609e+00 -1.7971774862e+02 1.8095156257e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -3.7349663467e+00 -2.3130388947e+01 -8.6786083310e+01 8.9893209547e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.7226607485e+01 +2.0761580786e+01 +2.8489983990e+02 2.8617427909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.26895249E+02 +0 + 1 21 0.44262697E-01 0.26895249E+02 + 1 21 0.41432541E-01 0.26895249E+02 + 0.32158164E+04 + + + + 5 1 +6.6372287e+07 2.51016900e+01 7.54677100e-03 1.65954600e-01 + 2 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +6.4617848855e+01 6.4617848855e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -4.5852280566e+01 4.5852280566e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +1.2633388858e+01 +1.7296317379e+01 -2.9732559349e+01 3.6644101767e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +1.3130515800e+01 -2.4553339855e+01 +1.7570358035e+01 3.2924070597e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.5763904658e+01 +7.2570224764e+00 +3.0927769604e+01 4.0901957057e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.25101687E+02 +0 + 1 21 0.70541970E-02 0.25101687E+02 + 1 2 0.99412075E-02 0.25101687E+02 + 0.89083039E+05 + + + + 5 1 +6.6372287e+07 2.65415900e+01 7.54677100e-03 1.63979800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.3037786153e+01 7.3037786153e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -8.2017257442e+01 8.2017257442e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.8697084487e+01 -8.2924898880e+00 -5.9625078565e+01 6.3035675222e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +6.9951105287e+00 +2.5911255642e+01 +5.4380851637e+01 6.0643233464e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -2.5692195015e+01 -1.7618765754e+01 -3.7352443596e+00 3.1376134909e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26541591E+02 +0 + 1 21 0.11236582E-01 0.26541591E+02 + 1 21 0.12618040E-01 0.26541591E+02 + 0.30903565E+06 + + + + 5 1 +6.6372287e+07 2.27761200e+01 7.54677100e-03 1.69516500e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +8.6173848945e+01 8.6173848945e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.3585034087e+01 6.3585034087e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.7032116927e+01 +8.2973252626e+00 +1.6307827832e+01 3.2642398819e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.9247818195e+01 +1.2926007751e+01 -5.0735466398e+01 5.5782145282e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +7.7842987316e+00 -2.1223333014e+01 +5.7016453425e+01 6.1334338931e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.22776118E+02 +0 + 1 21 0.13257515E-01 0.22776118E+02 + 1 21 0.97823130E-02 0.22776118E+02 + 0.35046139E+06 + + + + 5 1 +6.6372287e+07 3.80456700e+01 7.54677100e-03 1.52310600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3757684306e+01 1.3757684306e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.9694458511e+02 3.9694458511e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 -1.4438846203e+01 -1.7702498483e+01 -8.3619869477e+01 8.6684146783e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.4547368666e+01 -2.2261281669e+01 -2.7669903846e+02 2.7797400684e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.0852246356e-01 +3.9963780152e+01 -2.2867992874e+01 4.6044115794e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.38045671E+02 +0 + 1 21 0.21165667E-02 0.38045671E+02 + 1 21 0.61068400E-01 0.38045671E+02 + 0.26069372E+06 + + + + 5 1 +6.6372287e+07 4.59126200e+01 7.54677100e-03 1.46872300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.0311609080e+03 2.0311609080e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.4721053331e+01 2.4721053331e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +5.4574055215e+00 +4.5277462040e+01 +1.8656515221e+03 1.8662088398e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.1344441190e+01 +1.9716704689e+01 +9.5251287709e+01 1.0219603832e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -3.6801846712e+01 -6.4994166729e+01 +4.5537044828e+01 8.7477083181e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.45912622E+02 +0 + 1 21 0.31248616E+00 0.45912622E+02 + 1 21 0.38032406E-02 0.45912622E+02 + 0.16431983E+04 + + + + 5 1 +6.6372287e+07 3.23160000e+01 7.54677100e-03 1.57381400e-01 + 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +5.8873030751e+02 5.8873030751e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -2.0960881505e+01 2.0960881505e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.2271827274e+01 +2.2294515262e+01 +1.4647225131e+02 1.4866661885e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.0219290079e+01 -9.7210766525e-02 -1.3589093295e+01 2.4361703508e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 -3.2491117354e+01 -2.2197304495e+01 +4.3488626799e+02 4.3666286666e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32316004E+02 +0 + 1 21 0.32247515E-02 0.32316004E+02 + 1 1 0.90573881E-01 0.32316004E+02 + 0.20132875E+05 + + + + 5 1 +6.6372287e+07 4.71162200e+01 7.54677100e-03 1.46154700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.5698751231e+01 3.5698751231e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5024197302e+02 1.5024197302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -1.8393329399e+01 -2.0730266036e+01 +1.5134275211e+01 3.1576966011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +3.0318818959e+01 +4.1981229081e+01 -1.1447233348e+02 1.2564063639e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.1925489561e+01 -2.1250963045e+01 -1.5205163517e+01 2.8723121856e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.47116224E+02 +0 + 1 21 0.54921154E-02 0.47116224E+02 + 1 21 0.23114150E-01 0.47116224E+02 + 0.37322948E+06 + + + + 5 1 +6.6372287e+07 5.00477800e+01 7.54677100e-03 1.44508400e-01 + 2 -1 0 0 503 0 +0.0000000000e+00 +0.0000000000e+00 +3.8911714874e+02 3.8911714874e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -3.0237812812e+02 3.0237812812e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +2.7035206944e+01 +1.1996257553e+01 +1.2886486117e+02 1.3221560064e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.2748413249e+01 -4.4788353833e+01 -2.9753120134e+02 3.0174211293e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 -4.2867936948e+00 +3.2792096280e+01 +2.5540536079e+02 2.5753756328e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.50047775E+02 +0 + 1 2 0.59864176E-01 0.50047775E+02 + 1 2 0.46519712E-01 0.50047775E+02 + 0.13730376E+03 + + + + 5 1 +6.6372287e+07 2.97093000e+01 7.54677100e-03 1.60132400e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.1854806368e+02 1.1854806368e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.8789738375e+02 2.8789738375e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.1808462259e+01 +1.9091314842e+01 +2.7760747717e+01 4.0134105724e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +5.0913227713e+01 -2.3763320633e+01 +7.4385366719e+01 9.3220356927e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -2.9104765455e+01 +4.6720057908e+00 -2.7149543450e+02 2.7309098477e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29709305E+02 +0 + 1 21 0.18238164E-01 0.29709305E+02 + 1 1 0.44291905E-01 0.29709305E+02 + 0.26942502E+04 + + + + 5 1 +6.6372287e+07 2.76081800e+01 7.54677100e-03 1.62613900e-01 + 1 -1 0 0 501 0 +0.0000000000e+00 +0.0000000000e+00 +3.5918383131e+02 3.5918383131e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -9.9599640123e+00 9.9599640123e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.0226056517e+01 -1.7566903028e+01 +5.4113018863e+01 5.7804732716e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -1.8623508510e+01 -1.7954792984e+01 +2.3170430073e+02 2.3314393117e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 503 +2.8849565027e+01 +3.5521696012e+01 +6.3406547700e+01 7.8195131441e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27608180E+02 +0 + 1 1 0.55259041E-01 0.27608180E+02 + 1 -4 0.15323024E-02 0.27608180E+02 + 0.31455192E+04 + + + + 5 1 +6.6372287e+07 3.28240500e+01 7.54677100e-03 1.56881800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.7881981423e+00 5.7881981423e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2709795693e+03 1.2709795693e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.9221921883e+01 +8.0360733545e+00 -5.6807285970e+02 5.6888071959e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.6963846300e+01 -2.9307832371e+01 -5.0500567831e+01 6.0803194577e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +1.2258075583e+01 +2.1271759017e+01 -6.4661794361e+02 6.4708385326e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32824047E+02 +0 + 1 21 0.89049153E-03 0.32824047E+02 + 1 21 0.19553543E+00 0.32824047E+02 + 0.68369125E+05 + + + + 5 1 +6.6372287e+07 2.96747300e+01 7.54677100e-03 1.60171200e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +4.0441816137e+01 4.0441816137e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.0683900114e+02 1.0683900114e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.6554357024e+01 +3.2973328106e+01 +7.1332905671e+00 4.2933181546e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +2.0427248287e+01 -1.1559769425e+01 +3.8860694556e+00 2.3790802373e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +6.1271087369e+00 -2.1413558681e+01 -7.7416545026e+01 8.0556833358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29674731E+02 +0 + 1 21 0.62218180E-02 0.29674731E+02 + 1 2 0.16436769E-01 0.29674731E+02 + 0.63902127E+05 + + + + 5 1 +6.6372287e+07 2.77849600e+01 7.54677100e-03 1.62394800e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5901367143e+01 5.5901367143e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 -1 0 0 501 0 +0.0000000000e+00 -0.0000000000e+00 -1.6721775392e+02 1.6721775392e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.7214919673e+01 +1.5725661972e+01 -4.6993551561e+00 2.3785160136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 1 1 2 502 0 -7.1368635003e+00 -2.7166369610e+01 -1.4763278291e+02 1.5028102025e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 503 +2.4351783173e+01 +1.1440707638e+01 +4.1015751290e+01 4.9052940675e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27784960E+02 +0 + 1 4 0.25725808E-01 0.27784960E+02 + 1 -2 0.86002106E-02 0.27784960E+02 + 0.26447976E+03 + + + + 5 1 +6.6372287e+07 5.39590300e+01 7.54677100e-03 1.42508700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.9800586950e+01 5.9800586950e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -7.5246990846e+01 7.5246990846e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.7215966258e+01 -1.3481800279e+01 -1.3120982800e+01 2.5501149436e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 -4.4948096317e+01 +2.7284215874e+01 +3.3376063191e+01 6.2279381760e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 +2.7732130059e+01 -1.3802415596e+01 -3.5701484287e+01 4.7267046599e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.53959031E+02 +0 + 1 21 0.92000904E-02 0.53959031E+02 + 1 21 0.11576460E-01 0.53959031E+02 + 0.54280239E+06 + + + + 5 1 +6.6372287e+07 3.74932000e+01 7.54677100e-03 1.52751300e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +7.5616997299e+02 7.5616997299e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -9.5383624010e+00 9.5383624010e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +2.4198598086e+01 -2.8579386523e+01 +4.8877048754e+01 6.1573690634e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -4.1183674764e+01 +1.1831705659e+01 +1.5380528884e+02 1.5966261679e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +1.6985076678e+01 +1.6747680864e+01 +5.4394927300e+02 5.4447202797e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.37493203E+02 +0 + 1 21 0.11633379E+00 0.37493203E+02 + 1 21 0.14674410E-02 0.37493203E+02 + 0.11386933E+06 + + + + 5 1 +6.6372287e+07 4.04284600e+01 7.54677100e-03 1.50508200e-01 + 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +4.0351583823e+02 4.0351583823e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.3201441918e+01 1.3201441918e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 501 0 +4.2439120380e+01 +1.6559287496e+01 +4.1887418346e+01 6.1885739526e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 502 0 -2.1117804890e+01 +8.9613985453e+00 +2.4763202138e+02 2.4869235283e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 1 1 2 0 501 -2.1321315491e+01 -2.5520686042e+01 +1.0079495659e+02 1.0613918778e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40428461E+02 +0 + 1 21 0.62079350E-01 0.40428461E+02 + 1 4 0.20309914E-02 0.40428461E+02 + 0.80181641E+04 + + + + 5 1 +6.6372287e+07 3.42643000e+01 7.54677100e-03 1.55523200e-01 + 1 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +3.3344584104e+03 3.3344584104e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -9.8032652357e-01 9.8032652357e-01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.6378085835e+01 +1.3734600814e+01 +2.6780940543e+02 2.6866104777e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.1482442393e+01 -1.5710524314e+01 +4.2265106582e+02 4.2348818088e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +3.7860528228e+01 +1.9759235008e+00 +2.6430176126e+03 2.6432895082e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34264300E+02 +0 + 1 21 0.15082213E-03 0.34264300E+02 + 1 1 0.51298452E+00 0.34264300E+02 + 0.33282672E+05 + + + + 5 1 +6.6372287e+07 1.19571300e+02 7.54677100e-03 1.24380600e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.7854154861e+01 8.7854154861e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.7934232150e+03 1.7934232150e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.7605310395e+01 +1.3701422770e+01 +2.6274785914e+01 3.4467960701e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 502 0 +1.1756210603e+02 +1.7577983299e+01 -1.7004020433e+03 1.7045518306e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 502 -1.3516741643e+02 -3.1279406068e+01 -3.1441802775e+01 1.4225757860e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.11957126E+03 +0 + 1 2 0.27591127E+00 0.11957126E+03 + 1 -1 0.13516023E-01 0.11957126E+03 + 0.43015636E+02 + + + + 5 1 +6.6372287e+07 2.58481600e+01 7.54677100e-03 1.64911000e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.4121175553e+01 1.4121175553e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -4.5285316425e+02 4.5285316425e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.4043566251e+01 +1.9773590139e+01 -2.4347029469e+02 2.4467529711e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -2.6394599884e+01 -1.3369434684e+00 -6.6055666935e-01 2.6436691472e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +1.2351033633e+01 -1.8436646670e+01 -1.9460113733e+02 1.9586235121e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.25848162E+02 +0 + 1 21 0.21724885E-02 0.25848162E+02 + 1 21 0.69669719E-01 0.25848162E+02 + 0.18427689E+06 + + + + 5 1 +6.6372287e+07 5.64794400e+01 7.54677100e-03 1.41322900e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7881487994e+00 3.7881487994e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -8.3293306677e+02 8.3293306677e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.2042587179e+01 +4.9935512809e+01 -3.4469253261e+02 3.4898764036e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.0400442402e+01 -2.1391345840e+01 -1.5133248122e+02 1.5319033504e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -1.1642144777e+01 -2.8544166969e+01 -3.3311990414e+02 3.3454324016e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.56479436E+02 +0 + 1 21 0.58279180E-03 0.56479436E+02 + 1 1 0.12814362E+00 0.56479436E+02 + 0.18915759E+06 + + + + 5 1 +6.6372287e+07 1.20437600e+02 7.54677100e-03 1.24237700e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.6326880933e+01 7.6326880933e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -5.6424004213e+02 5.6424004213e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.3358633684e+02 -3.2803021929e+01 -2.5791576009e+01 1.3995196687e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +9.1619991808e+01 +2.3833933827e+01 -1.0682884494e+02 1.4273990827e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 +4.1966345031e+01 +8.9690881026e+00 -3.5529274025e+02 3.5787504793e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.12043761E+03 +0 + 1 21 0.11742597E-01 0.12043761E+03 + 1 1 0.86806160E-01 0.12043761E+03 + 0.26061044E+04 + + + + 5 1 +6.6372287e+07 5.37227900e+01 7.54677100e-03 1.42623700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3304377309e+02 1.3304377309e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.0963777946e+01 7.0963777946e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.5841020033e+01 +3.7513476754e+01 -1.3490766536e+01 4.2897548130e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -5.0497052458e+01 -2.8085691040e+00 +6.9558481825e+01 8.6001295113e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +6.6338072490e+01 -3.4704907650e+01 +6.0122798510e+00 7.5108707789e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.53722789E+02 +0 + 1 21 0.20468273E-01 0.53722789E+02 + 1 21 0.10917504E-01 0.53722789E+02 + 0.14842924E+06 + + + + 5 1 +6.6372287e+07 4.63666500e+01 7.54677100e-03 1.46598600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.9404929403e+01 7.9404929403e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.0571876305e+01 4.0571876305e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +3.8096004013e+01 -2.0103472745e+01 +5.5898111257e+01 7.0569497520e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -2.4864947573e+01 +3.4826673187e+00 -8.6196401551e+00 2.6546050287e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.3231056440e+01 +1.6620805427e+01 -8.4454180038e+00 2.2861257901e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.46366646E+02 +0 + 1 21 0.12216143E-01 0.46366646E+02 + 1 21 0.62418271E-02 0.46366646E+02 + 0.93143569E+06 + + + + 5 1 +6.6372287e+07 3.23809700e+01 7.54677100e-03 1.57316900e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.7524908213e+01 2.7524908213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -1.6414512109e+02 1.6414512109e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 +1.0735752809e+01 +2.3888282841e+01 -4.2028929504e+01 4.9521079963e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +8.4348718406e+00 -4.8157198822e+01 -9.9473441559e+01 1.1083874971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -1.9170624649e+01 +2.4268915981e+01 +4.8821581875e+00 3.1310199631e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32380969E+02 +0 + 1 21 0.42346013E-02 0.32380969E+02 + 1 21 0.25253095E-01 0.32380969E+02 + 0.46542943E+06 + + + + 5 1 +6.6372287e+07 3.04619800e+01 7.54677100e-03 1.59303800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.2711935090e+03 2.2711935090e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -2.7585117854e+00 2.7585117854e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.0648195639e+01 -1.1330743851e+01 +3.5163773314e+02 3.5242563628e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -2.3394837542e+01 -2.0181653530e+01 +1.0496115767e+02 1.0941417724e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +2.7466419032e+00 +3.1512397381e+01 +1.8118361064e+03 1.8121122073e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30461979E+02 +0 + 1 21 0.42438824E-03 0.30461979E+02 + 1 2 0.34941289E+00 0.30461979E+02 + 0.91450792E+05 + + + + 5 1 +6.6372287e+07 5.05952900e+01 7.54677100e-03 1.44215700e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7310225510e+01 1.7310225510e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -1.2474989233e+03 1.2474989233e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -2.0960667079e+01 -3.2621697190e+00 -1.3563546253e-01 2.1213432364e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -3.0621090753e+01 +1.1876026904e+01 -3.7308678078e+01 4.9705418959e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +5.1581757832e+01 -8.6138571850e+00 -1.1927443843e+03 1.1938902975e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.50595294E+02 +0 + 1 21 0.26631115E-02 0.50595294E+02 + 1 2 0.19192292E+00 0.50595294E+02 + 0.17875597E+05 + + + + 5 1 +6.6372287e+07 5.35686700e+01 7.54677100e-03 1.42699200e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.0775922111e+01 2.0775922111e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.9382658620e+02 1.9382658620e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -2.0032585171e+01 +8.2484330843e+00 -1.7029580071e+01 2.7556264521e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -3.0721524725e+01 +4.2318877201e+00 -1.1017363691e+01 3.2910534147e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 +5.0754109896e+01 -1.2480320804e+01 -1.4500372033e+02 1.5413570964e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.53568673E+02 +0 + 1 21 0.31962958E-02 0.53568673E+02 + 1 21 0.29819474E-01 0.53568673E+02 + 0.56798716E+06 + + + + 5 1 +6.6372287e+07 3.85025300e+01 7.54677100e-03 1.51953000e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.2289530809e+02 2.2289530809e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 3 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1799706113e+01 1.1799706113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.1037505294e+01 +3.1358606708e+01 +8.1763268338e+01 9.0062039134e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +1.7522488983e+01 -1.3904294537e+01 +4.7427977092e+01 5.2438345109e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 3 1 1 2 501 0 -3.8559994278e+01 -1.7454312171e+01 +8.1904356543e+01 9.2194629956e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.38502528E+02 +0 + 1 21 0.34291583E-01 0.38502528E+02 + 1 3 0.18153396E-02 0.38502528E+02 + 0.34255031E+05 + + + + 5 1 +6.6372287e+07 3.75647200e+01 7.54677100e-03 1.52693700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.1007388093e+00 5.1007388093e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.1898429651e+03 1.1898429651e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.3577055731e+01 +8.6164758199e+00 -5.6840562318e+02 5.6895964152e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.7661209766e+01 -3.2338872055e+01 -8.6569325177e+01 9.4084903347e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +4.1238265497e+01 +2.3722396235e+01 -5.2976727797e+02 5.3189915908e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.37564724E+02 +0 + 1 21 0.78472893E-03 0.37564724E+02 + 1 21 0.18305279E+00 0.37564724E+02 + 0.98499507E+05 + + + + 5 1 +6.6372287e+07 1.43855200e+02 7.54677100e-03 1.20823400e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7613958321e+02 1.7613958321e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.5721546644e+02 2.5721546644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.8697441469e+01 +4.6743956885e+01 +6.3444037986e+01 8.0992208136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 +1.5605198954e+01 -1.3052773015e+02 -2.0496954320e+02 2.4350261644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 -3.4302640423e+01 +8.3783773266e+01 +6.0449621991e+01 1.0886022507e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.14385516E+03 +0 + 1 21 0.27098398E-01 0.14385516E+03 + 1 21 0.39571610E-01 0.14385516E+03 + 0.81522626E+04 + + + + 5 1 +6.6372287e+07 2.99475600e+01 7.54677100e-03 1.59866900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.1735167452e+02 1.1735167452e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.7731005686e+01 6.7731005686e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -3.0037339775e+01 +1.9906567203e+01 +7.7442255748e+01 8.5415549954e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 +3.9581584148e+00 -2.3241579138e+01 -5.6222098856e+01 6.0965255831e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +2.6079181360e+01 +3.3350119355e+00 +2.8400511938e+01 3.8701874417e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29947557E+02 +0 + 1 21 0.18054104E-01 0.29947557E+02 + 1 21 0.10420155E-01 0.29947557E+02 + 0.19218372E+06 + + + + 5 1 +6.6372287e+07 5.51267100e+01 7.54677100e-03 1.41950100e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.3605621517e+02 3.3605621517e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.6748192173e+02 1.6748192173e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -3.6502084271e+01 +1.2779288039e+01 +2.6845966100e+00 3.8767504664e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 +5.6330187720e+01 -2.0382049136e+01 -1.4305805077e+02 1.5509391950e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.9828103448e+01 +7.6027610964e+00 +3.0894774760e+02 3.0967671273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.55126711E+02 +0 + 1 21 0.51700956E-01 0.55126711E+02 + 1 21 0.25766450E-01 0.55126711E+02 + 0.55367291E+04 + + + + 5 1 +6.6372287e+07 2.38060800e+01 7.54677100e-03 1.67876700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.6292151055e+01 5.6292151055e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -1.0100350383e+02 1.0100350383e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -1.7078783867e+01 +1.3980353782e+01 +4.3115131723e+01 4.8436037552e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +1.9550036137e+01 +1.1807248332e+01 -6.8557268747e+00 2.3845670826e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -2.4712522707e+00 -2.5787602114e+01 -8.0970757619e+01 8.5013946502e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.23806083E+02 +0 + 1 21 0.86603311E-02 0.23806083E+02 + 1 21 0.15539000E-01 0.23806083E+02 + 0.32926995E+06 + + + + 5 1 +6.6372287e+07 4.37816800e+01 7.54677100e-03 1.48209400e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.6069687389e+01 7.6069687389e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.3690305890e+01 6.3690305890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -3.1851837126e+01 -2.0905814741e+01 -1.0380698209e+01 3.9488625117e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +5.2242018124e+01 +1.1087919075e+01 -1.5683784728e+01 5.5661041227e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 503 -2.0390180998e+01 +9.8178956656e+00 +3.8443864437e+01 4.4610326936e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43781676E+02 +0 + 1 21 0.97985087E-02 0.43781676E+02 + 1 -2 0.11703029E-01 0.43781676E+02 + 0.29443686E+05 + + + + 5 1 +6.6372287e+07 6.86898000e+01 7.54677100e-03 1.36460600e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.1667938332e+02 2.1667938332e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5467804492e+02 1.5467804492e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 -1.1038053112e+01 +2.3433804553e+01 -2.7774504919e+01 3.7979006514e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -1.5088204651e+01 +3.2631253632e+01 -1.1446451215e+02 1.1997740276e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +2.6126257764e+01 -5.6065058184e+01 +2.0424035546e+02 2.1340101896e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.68689805E+02 +0 + 1 21 0.33335290E-01 0.68689805E+02 + 1 21 0.23796622E-01 0.68689805E+02 + 0.15057095E+05 + + + + 5 1 +6.6372287e+07 5.62723200e+01 7.54677100e-03 1.41417600e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.2380787192e+02 1.2380787192e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.0327409785e+03 2.0327409785e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.3729827285e+01 -2.7343988978e+01 -1.7426108947e+03 1.7429869560e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -5.3672579255e+00 -2.5936900616e+01 -2.8286809267e+02 2.8410541727e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 -1.8362569359e+01 +5.3280889594e+01 +1.1654588079e+02 1.2945647716e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.56272325E+02 +0 + 1 21 0.31272939E+00 0.56272325E+02 + 1 -2 0.19047365E-01 0.56272325E+02 + 0.69988168E+01 + + + + 5 1 +6.6372287e+07 3.06264600e+01 7.54677100e-03 1.59126500e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.3143843443e+00 7.3143843443e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8991776011e+02 2.8991776011e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.7176763448e+01 -5.9122717629e+00 -8.2589250105e+01 8.7146518358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +3.7114675978e+00 -2.1312114619e+01 -3.4556109190e+01 4.0768933069e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +2.3465295850e+01 +2.7224386382e+01 -1.6545801647e+02 1.6931669302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.30626458E+02 +0 + 1 21 0.11252898E-02 0.30626458E+02 + 1 21 0.44602736E-01 0.30626458E+02 + 0.12571056E+07 + + + + 5 1 +6.6372287e+07 2.39754200e+01 7.54677100e-03 1.67617000e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.1086457142e+00 3.1086457142e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -1.1096565876e+03 1.1096565876e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.8074701851e+01 -1.2471200900e+01 -4.7301063874e+01 5.2149940964e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.5797907864e+01 -1.5878819740e+01 -2.9957929976e+02 3.0041549170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.2767939866e+00 +2.8350020640e+01 -7.5966757822e+02 7.6019980061e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.23975415E+02 +0 + 1 21 0.47825339E-03 0.23975415E+02 + 1 2 0.17071633E+00 0.23975415E+02 + 0.26724670E+06 + + + + 5 1 +6.6372287e+07 2.53344800e+01 7.54677100e-03 1.65624400e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.1570098369e+02 5.1570098369e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.8445706221e+02 1.8445706221e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +2.4207533199e+01 -7.3828354628e+00 +5.0432335498e+02 5.0495797578e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +8.1618412819e+00 -2.1011090789e+01 -6.9595529015e+00 2.3590611815e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 -3.2369374481e+01 +2.8393926251e+01 -1.6611988060e+02 1.7160945831e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.25334477E+02 +0 + 1 21 0.79338612E-01 0.25334477E+02 + 1 -1 0.28378010E-01 0.25334477E+02 + 0.16118883E+03 + + + + 5 1 +6.6372287e+07 4.96978700e+01 7.54677100e-03 1.44697800e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.1669869678e+01 4.1669869678e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -7.2169785693e+02 7.2169785693e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.0137451528e+01 -1.9749989160e+01 -4.1326305480e+02 4.1385889197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +7.1118147579e-01 -2.6001039854e+01 -2.9419229654e+02 2.9533991805e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 +9.4262700527e+00 +4.5751029015e+01 +2.7427364092e+01 5.4168916585e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.49697867E+02 +0 + 1 21 0.11103045E+00 0.49697867E+02 + 1 -1 0.64107485E-02 0.49697867E+02 + 0.70621395E+03 + + + + 5 1 +6.6372287e+07 4.01879700e+01 7.54677100e-03 1.50683300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.1802510669e+01 7.1802510669e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.1337629671e+01 7.1337629671e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +5.8213069507e+00 +5.5036070403e+01 -5.3681175091e+00 5.5602817785e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -7.7116246928e+00 -3.4472047188e+01 -2.9891185989e+01 4.6273904012e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +1.8903177421e+00 -2.0564023215e+01 +3.5724184496e+01 4.1263418543e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40187974E+02 +0 + 1 21 0.11046540E-01 0.40187974E+02 + 1 21 0.10975020E-01 0.40187974E+02 + 0.42308800E+06 + + + + 5 1 +6.6372287e+07 2.09126700e+01 7.54677100e-03 1.72776500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.1568669659e+02 2.1568669659e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4598327407e+01 4.4598327407e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.0489102981e+01 +2.1031084145e+01 +3.3096570840e+00 2.3733554552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -2.0964401963e+01 -3.6307327249e+00 +1.9854727555e+02 1.9968402289e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 +1.0475298982e+01 -1.7400351420e+01 -3.0768563453e+01 3.6867446552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.20912670E+02 +0 + 1 21 0.33182568E-01 0.20912670E+02 + 1 21 0.68612813E-02 0.20912670E+02 + 0.12384532E+06 + + + + 5 1 +6.6372287e+07 2.76487000e+01 7.54677100e-03 1.62563500e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +8.8315219304e+00 8.8315219304e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -8.5548998670e+02 8.5548998670e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -2.6108367324e+01 +2.7834397992e+01 -5.0511958607e+01 6.3307649761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 3 1 1 2 501 0 -1.5502416378e+00 -2.7227504772e+01 -8.3708945045e+01 8.8039353392e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -3 1 1 2 0 503 +2.7658608962e+01 -6.0689322000e-01 -7.1243756111e+02 7.1297450547e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27648700E+02 +0 + 1 21 0.13586955E-02 0.27648700E+02 + 1 21 0.13161386E+00 0.27648700E+02 + 0.94752714E+05 + + + + 5 1 +6.6372287e+07 3.42335000e+01 7.54677100e-03 1.55551500e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5344269194e+01 5.5344269194e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5616791251e+02 5.5616791251e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -5.3992539344e+01 -1.9093964990e+01 -4.2937502340e+02 4.3317742845e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +2.0534474104e+01 +1.3341922147e+01 -1.1811465484e+02 1.2062646145e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +3.3458065241e+01 +5.7520428423e+00 +4.6666034926e+01 5.7708291798e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.34233500E+02 +0 + 1 21 0.85564297E-01 0.34233500E+02 + 1 -1 0.85145026E-02 0.34233500E+02 + 0.83709286E+03 + + + + 5 1 +6.6372287e+07 3.19061800e+01 7.54677100e-03 1.57792600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.0941997221e+01 3.0941997221e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4106991043e+02 1.4106991043e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -7.0637436764e+00 -2.8412231374e+01 -9.0188811038e+01 9.4821796033e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.2887679806e+01 +4.9262952225e+01 -2.1945566891e+01 5.5448522603e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -5.8239361293e+00 -2.0850720851e+01 +2.0064647242e+00 2.1741589011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.31906182E+02 +0 + 1 21 0.47603074E-02 0.31906182E+02 + 1 21 0.21703063E-01 0.31906182E+02 + 0.50542998E+06 + + + + 5 1 +6.6372287e+07 3.48184100e+01 7.54677100e-03 1.55021900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.1589074046e+03 2.1589074046e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.5961454167e+01 1.5961454167e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +2.0024453480e+01 +1.8334123955e+01 +1.9389015022e+03 1.9390915796e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -3.0260374134e+01 +1.8529083324e+01 +8.4480836704e+00 3.6474474489e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +1.0235920654e+01 -3.6863207279e+01 +1.9559636455e+02 1.9930280466e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34818413E+02 +0 + 1 21 0.33213937E+00 0.34818413E+02 + 1 21 0.24556100E-02 0.34818413E+02 + 0.27565033E+04 + + + + 5 1 +6.6372287e+07 3.59920600e+01 7.54677100e-03 1.53996300e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +9.7186356638e+01 9.7186356638e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -2.5874356624e+02 2.5874356624e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.8438172649e+01 -2.1235714101e+01 -8.7075130972e+01 9.1504099350e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +5.0238810426e+01 +4.6817397829e+00 -1.6209235767e+02 1.6976392189e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -3.1800637777e+01 +1.6553974318e+01 +8.7610279039e+01 9.4661901640e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.35992060E+02 +0 + 1 21 0.14951747E-01 0.35992060E+02 + 1 21 0.39806702E-01 0.35992060E+02 + 0.24770639E+05 + + + + 5 1 +6.6372287e+07 4.43353000e+01 7.54677100e-03 1.47853400e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.1509889175e+01 4.1509889175e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5021838758e+02 1.5021838758e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +2.1728377512e+01 -4.5038835640e+00 +4.7828765006e+00 2.2699851631e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +3.7288465851e+01 +1.8488032542e+01 -9.7938315943e+01 1.0641499313e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -5.9016843363e+01 -1.3984148978e+01 -1.5553058964e+01 6.2613431996e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.44335296E+02 +0 + 1 21 0.63861368E-02 0.44335296E+02 + 1 21 0.23110521E-01 0.44335296E+02 + 0.28976826E+06 + + + + 5 1 +6.6372287e+07 8.95697900e+01 7.54677100e-03 1.30389700e-01 + -1 -1 0 0 0 502 -0.0000000000e+00 +0.0000000000e+00 +1.0959072147e+02 1.0959072147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 -1 0 0 0 501 +0.0000000000e+00 -0.0000000000e+00 -2.3941333748e+02 2.3941333748e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +4.4433031586e+01 -8.6837769029e+00 +3.1109742872e+01 5.4931943155e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 1 1 2 0 502 -7.9188610214e+01 +3.3845172129e+01 -2.1114031298e+02 2.2802754971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +3.4755578628e+01 -2.5161395226e+01 +5.0207954089e+01 6.6044566085e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.89569794E+02 +0 + 1 -4 0.36832821E-01 0.89569794E+02 + 1 -1 0.16860111E-01 0.89569794E+02 + 0.65999041E+02 + + + + 5 1 +6.6372287e+07 3.30531100e+01 7.54677100e-03 1.56660200e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.4577216351e+00 7.4577216351e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.9293771215e+03 2.9293771215e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -8.0432547113e+00 +3.1734139686e+01 -3.2819056421e+01 4.6355582537e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -6.2607331878e+00 +2.0255158476e+01 -2.7252772679e+02 2.7335111139e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +1.4303987899e+01 -5.1989298162e+01 -2.6165726167e+03 2.6171281493e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.33053112E+02 +0 + 1 21 0.11473423E-02 0.33053112E+02 + 1 2 0.45067321E+00 0.33053112E+02 + 0.95448503E+04 + + + + 5 1 +6.6372287e+07 2.64822300e+01 7.54677100e-03 1.64058200e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.0433495267e+02 2.0433495267e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.1621962120e+01 1.1621962120e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -6.1453446806e-01 +2.8655724578e+01 +2.6926488695e+01 3.9326377880e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.9294771642e+01 -8.5428375362e+00 +2.2933975834e+01 3.1164651987e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -1.8680237174e+01 -2.0112887042e+01 +1.4285252602e+02 1.4546588493e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.26482232E+02 +0 + 1 21 0.17879943E-02 0.26482232E+02 + 1 2 0.31436144E-01 0.26482232E+02 + 0.22563568E+06 + + + + 5 1 +6.6372287e+07 2.94114000e+01 7.54677100e-03 1.60468700e-01 + 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +7.1456949129e+01 7.1456949129e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 +0.0000000000e+00 -0.0000000000e+00 -3.2341513368e+01 3.2341513368e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.2633702387e+01 +1.6779116547e+01 +3.5382422326e+01 4.1146871057e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +3.4850371802e+01 -4.6645490224e-01 +1.2205098572e-01 3.4853706995e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -2.2216669414e+01 -1.6312661644e+01 +3.6109624495e+00 2.7797884444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29411403E+02 +0 + 1 2 0.49756174E-02 0.29411403E+02 + 1 1 0.10993377E-01 0.29411403E+02 + 0.84192782E+04 + + + + 5 1 +6.6372287e+07 4.12907800e+01 7.54677100e-03 1.49892200e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.6408792035e+01 1.6408792035e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8426721197e+02 2.8426721197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -9.0227533210e+00 +4.0120314210e+01 -1.6777925940e+02 1.7274527367e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -2.8940986847e+01 -8.3287328478e+00 -3.5522385380e+01 4.6570252025e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +3.7963740168e+01 -3.1791581362e+01 -6.4556775153e+01 8.1360478312e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.41290780E+02 +0 + 1 21 0.25244298E-02 0.41290780E+02 + 1 21 0.43733413E-01 0.41290780E+02 + 0.38789728E+06 + + + + 5 1 +6.6372287e+07 3.01075800e+01 7.54677100e-03 1.59690300e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.0837614737e+02 1.0837614737e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -5.0187655412e+02 5.0187655412e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.4244911852e+01 +2.4839805338e+00 -4.4874897660e+01 5.1066058689e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 502 0 +9.4171551279e+00 +2.3796264682e+01 -4.5137403450e+02 4.5209895382e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 502 +1.4827756724e+01 -2.6280245215e+01 +1.0274852541e+02 1.0708768898e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.30107583E+02 +0 + 1 1 0.77211778E-01 0.30107583E+02 + 1 -1 0.16673253E-01 0.30107583E+02 + 0.11578566E+03 + + + + 5 1 +6.6372287e+07 2.75831300e+01 7.54677100e-03 1.62645100e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +1.5119270639e+01 1.5119270639e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 503 -0.0000000000e+00 -0.0000000000e+00 -4.4797372094e+02 4.4797372094e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.3933439080e+01 -1.5349180970e+01 -7.6519214979e+00 2.9444163518e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -9.1468995899e+00 +2.8848403826e+01 -5.3965016364e+01 6.1871796213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 -1.4786539490e+01 -1.3499222856e+01 -3.7123751244e+02 3.7177703185e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27583132E+02 +0 + 1 21 0.23260418E-02 0.27583132E+02 + 1 21 0.68919030E-01 0.27583132E+02 + 0.17059051E+06 + + + + 5 1 +6.6372287e+07 5.38232700e+01 7.54677100e-03 1.42574700e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +8.6931327192e+02 8.6931327192e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3878826862e+01 1.3878826862e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.1384767832e+01 -5.5414417532e+01 +5.7810173998e+02 5.8086314427e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +4.8586431572e+01 +3.3136350907e+01 +2.3303180192e+02 2.4033826119e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 -3.7201663740e+01 +2.2278066625e+01 +4.4300903156e+01 6.1990693318e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.53823273E+02 +0 + 1 21 0.13374046E+00 0.53823273E+02 + 1 21 0.21352048E-02 0.53823273E+02 + 0.45957317E+05 + + + + 5 1 +6.6372287e+07 4.09907500e+01 7.54677100e-03 1.50104500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.0992522075e+01 7.0992522075e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3949703176e+02 1.3949703176e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.1659851347e+01 -5.1077643128e+01 +4.3516059870e+01 6.8106719401e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.0482205563e+00 +2.8158917183e+01 -1.2900096167e+00 2.8352784524e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -1.4708071904e+01 +2.2918725946e+01 -1.1073055994e+02 1.1403004991e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40990751E+02 +0 + 1 21 0.10921927E-01 0.40990751E+02 + 1 21 0.21461082E-01 0.40990751E+02 + 0.13404950E+06 + + + + 5 1 +6.6372287e+07 3.36149800e+01 7.54677100e-03 1.56125500e-01 + 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +7.9720643936e+01 7.9720643936e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1015809106e+02 1.1015809106e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +2.0832938167e+01 +2.8798086026e+01 -8.7908167480e+01 9.4821869741e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 502 0 -3.2596791056e+01 +4.0076403627e+00 +4.9338106023e+01 5.9269390704e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 501 +1.1763852889e+01 -3.2805726389e+01 +8.1326143350e+00 3.5787474550e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33614975E+02 +0 + 1 21 0.12264714E-01 0.33614975E+02 + 1 1 0.16947399E-01 0.33614975E+02 + 0.16891392E+05 + + + + 5 1 +6.6372287e+07 5.25551000e+01 7.54677100e-03 1.43202800e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.5821819927e+02 4.5821819927e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -1.2383942127e+01 1.2383942127e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -5.0926951541e+01 -5.0815690566e+00 +5.0883321493e+01 7.2169863125e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +2.4440122995e+01 +3.5331599133e+00 +1.4625618020e+02 1.4832623867e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 +2.6486828546e+01 +1.5484091433e+00 +2.4869475544e+02 2.5010603960e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.52555102E+02 +0 + 1 21 0.19052222E-02 0.52555102E+02 + 1 -1 0.70495095E-01 0.52555102E+02 + 0.23587364E+05 + + + + 5 1 +6.6372287e+07 4.46278200e+01 7.54677100e-03 1.47667900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.5593250322e+02 5.5593250322e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8836808285e+02 1.8836808285e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +4.0713374762e+01 +1.7975906273e+01 +5.4492854609e+02 5.4674293085e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -4.8243013951e+01 -3.8385710795e+01 -1.0265016479e+02 1.1974100183e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +7.5296391891e+00 +2.0409804522e+01 -7.4713960932e+01 7.7816653392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.44627825E+02 +0 + 1 21 0.85528077E-01 0.44627825E+02 + 1 21 0.28979705E-01 0.44627825E+02 + 0.15745256E+04 + + + + 5 1 +6.6372287e+07 3.37382900e+01 7.54677100e-03 1.56009900e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.5881756914e+03 1.5881756914e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.3626711614e+01 1.3626711614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.5839259073e+01 -9.7303516134e+00 +1.6154599506e+02 1.6388854618e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.7069497306e+01 +2.9008026152e+01 +1.1420766537e+01 3.5542470761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +4.2908756379e+01 -1.9277674538e+01 +1.4015822182e+03 1.4023713861e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33738290E+02 +0 + 1 21 0.20964184E-02 0.33738290E+02 + 1 2 0.24433458E+00 0.33738290E+02 + 0.17276941E+05 + + + + 5 1 +6.6372287e+07 3.21182600e+01 7.54677100e-03 1.57578900e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.7751679366e+02 4.7751679366e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.2525410541e+01 2.2525410541e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.0544572028e+01 +2.8771952473e+01 +1.5170782840e+02 1.5477169782e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.6106310540e+01 +5.1888225486e+00 -1.0981232855e+01 2.8793242340e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -1.5561738512e+01 -3.3960775022e+01 +3.1426478757e+02 3.1647726404e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32118265E+02 +0 + 1 21 0.73464115E-01 0.32118265E+02 + 1 21 0.34654481E-02 0.32118265E+02 + 0.80826114E+05 + + + + 5 1 +6.6372287e+07 4.09504400e+01 7.54677100e-03 1.50133100e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.7536299147e+02 1.7536299147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.6964015600e+01 3.6964015600e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +4.0714726769e+00 +5.7850454719e+01 +1.0890596398e+00 5.8003776186e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +4.7732900088e+00 -1.9654732976e+01 +1.3161726099e+01 2.4131387440e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -8.8447626857e+00 -3.8195721743e+01 +1.2414819013e+02 1.3019184344e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.40950439E+02 +0 + 1 21 0.26978921E-01 0.40950439E+02 + 1 21 0.56867717E-02 0.40950439E+02 + 0.26246574E+06 + + + + 5 1 +6.6372287e+07 2.73967700e+01 7.54677100e-03 1.62878500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +9.4621731867e+02 9.4621731867e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -5.2344682715e+02 5.2344682715e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +3.5097651064e+01 +5.1743549524e+00 +5.7624905467e+01 6.7670146960e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -1.6769759782e+01 -2.1740470328e+01 +8.8306196865e+02 8.8348871717e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.8327891283e+01 +1.6566115375e+01 -5.1791638260e+02 5.1850528169e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27396771E+02 +0 + 1 21 0.14557190E+00 0.27396771E+02 + 1 21 0.80530281E-01 0.27396771E+02 + 0.67303146E+02 + + + + 5 1 +6.6372287e+07 8.17190300e+01 7.54677100e-03 1.32424300e-01 + 1 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +3.6186655864e+02 3.6186655864e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -8.5536214190e+01 8.5536214190e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +6.9748665717e+01 -2.1225395602e+01 +1.1519982450e+02 1.3633192345e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +1.4237054539e+01 +2.7062837406e+01 +2.1101094416e+02 2.1321517170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -8.3985720256e+01 -5.8374418043e+00 -4.9880424207e+01 9.7855677673e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.81719032E+02 +0 + 1 1 0.55671777E-01 0.81719032E+02 + 1 1 0.13159418E-01 0.81719032E+02 + 0.34676915E+03 + + + + 5 1 +6.6372287e+07 3.21808800e+01 7.54677100e-03 1.57516200e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.6785736656e+03 1.6785736656e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.6055706089e+00 2.6055706089e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.3779710329e+01 -3.5600343603e+01 +9.7923046116e+02 9.7997426545e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.3134215327e+01 +6.9265799703e+00 +7.6205484473e+01 7.9940260774e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 -9.3545049987e+00 +2.8673763633e+01 +6.2053214934e+02 6.2126470997e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32180882E+02 +0 + 1 21 0.40085836E-03 0.32180882E+02 + 1 -1 0.25824124E+00 0.32180882E+02 + 0.94656396E+04 + + + + 5 1 +6.6372287e+07 2.72410900e+01 7.54677100e-03 1.63075300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +4.0924620761e+01 4.0924620761e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.0688357873e+02 1.0688357873e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +1.4260677542e+01 +2.3044763190e+01 +2.6464656953e+01 3.7878834487e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -3.4418262802e+01 -1.1596463544e+01 -5.7672141314e+01 6.8155488882e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +2.0157585259e+01 -1.1448299646e+01 -3.4751473609e+01 4.1773876122e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27241089E+02 +0 + 1 21 0.62960956E-02 0.27241089E+02 + 1 21 0.16443627E-01 0.27241089E+02 + 0.51232088E+06 + + + + 5 1 +6.6372287e+07 8.96432400e+01 7.54677100e-03 1.30371800e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.9106265508e+03 1.9106265508e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -8.3144018617e+00 8.3144018617e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +2.0112949282e+01 -5.3085600688e+01 +9.4969449400e+02 9.5138963820e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -4.7004645833e+00 -3.1846193563e+01 +6.9718679537e+02 6.9792958244e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.5412484699e+01 +8.4931794252e+01 +2.5543085956e+02 2.6962173201e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.89643239E+02 +0 + 1 21 0.29394220E+00 0.89643239E+02 + 1 21 0.12791403E-02 0.89643239E+02 + 0.10444343E+05 + + + + 5 1 +6.6372287e+07 3.51788400e+01 7.54677100e-03 1.54701800e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.4477402762e+01 2.4477402762e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.3548028651e+02 1.3548028651e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +5.2535145793e+00 -1.9356004202e+01 -7.2383362573e+01 7.5110621695e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -1.7320229400e+01 -2.4467895105e+01 -1.3062058210e+01 3.2699932753e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 502 +1.2066714821e+01 +4.3823899307e+01 -2.5557462961e+01 5.2147134821e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.35178836E+02 +0 + 1 21 0.37657543E-02 0.35178836E+02 + 1 -4 0.20843121E-01 0.35178836E+02 + 0.26617371E+05 + + + + 5 1 +6.6372287e+07 5.53124500e+01 7.54677100e-03 1.41862700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.4660412160e+02 2.4660412160e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.8448765078e+02 7.8448765078e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -4.2504266063e+01 -2.5589506438e+01 -2.1846702105e+02 2.2402962920e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +6.8994906150e+01 -2.2979517961e+01 -5.5770982325e+02 5.6243097556e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -2.6490640086e+01 +4.8569024399e+01 +2.3829331512e+02 2.4463116762e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.55312446E+02 +0 + 1 21 0.37939096E-01 0.55312446E+02 + 1 21 0.12069041E+00 0.55312446E+02 + 0.42114463E+03 + + + + 5 1 +6.6372287e+07 3.03757500e+01 7.54677100e-03 1.59397200e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.2971462251e+03 1.2971462251e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -5.6448232432e+00 5.6448232432e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -2.3055791365e+00 +2.0675048888e+01 +2.1686626098e+02 2.1786176464e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.9371916466e+01 -8.2619650311e+00 +4.2957332697e+01 5.2690625120e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +3.1677495603e+01 -1.2413083857e+01 +1.0316778082e+03 1.0322386586e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30375748E+02 +0 + 1 21 0.86843538E-03 0.30375748E+02 + 1 2 0.19956072E+00 0.30375748E+02 + 0.91540288E+05 + + + + 5 1 +6.6372287e+07 4.60134200e+01 7.54677100e-03 1.46811200e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.7829784629e+01 1.7829784629e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.8514522110e+02 2.8514522110e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -3.0860485485e+01 +3.4588417908e+01 -6.7479109992e+01 8.1866711813e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.6117598747e+01 +1.5550519447e+01 -1.3824680405e+02 1.4154902490e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 +4.7428867379e+00 -5.0138937355e+01 -6.1589522426e+01 7.9559269015e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.46013420E+02 +0 + 1 21 0.43868495E-01 0.46013420E+02 + 1 -1 0.27430438E-02 0.46013420E+02 + 0.16895399E+05 + + +
From 43ef2e8ef537b3b24d2a9773159af768a7b89dab Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 13 Mar 2024 15:35:04 +0100 Subject: [PATCH 07/76] added proper makefiles for rwgt_runners and rwgt_driver --- MG5aMC/mg5amcnlo | 2 +- .../template_files/gpu/cudacpp_rex_driver.mk | 1049 +++++++++++++++++ .../{cudacpp_rex.mk => cudacpp_rex_runner.mk} | 15 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 19 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 24 +- tools/REX/rwgt_driver.cc | 2 +- tools/REX/rwgt_instance.h | 2 + tools/REX/rwgt_runner.cc | 4 +- tools/REX/teawREX.hpp | 4 - 9 files changed, 1100 insertions(+), 21 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk rename epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/{cudacpp_rex.mk => cudacpp_rex_runner.mk} (98%) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index a458c4c92b..9d809a7a3d 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit a458c4c92b1887bb006f5b99b0c94059ec2c29fa +Subproject commit 9d809a7a3d149298ec5d77bacd9e3407f344c9da diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk new file mode 100644 index 0000000000..3a8c3e3e98 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk @@ -0,0 +1,1049 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../Source/make_opts),) +include ../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) +override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) +override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else +override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../test +TESTDIRLOCAL = ../test +ifneq ($(wildcard $(GTEST_ROOT)),) +TESTDIR = +else ifneq ($(LOCALGTEST),) +TESTDIR=$(TESTDIRLOCAL) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../epochX/cudacpp/CODEGEN),) +TESTDIR = $(TESTDIRCOMMON) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else +TESTDIR = +endif +ifneq ($(GTEST_ROOT),) +GTESTLIBDIR = $(GTEST_ROOT)/lib64/ +GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a +GTESTINC = -I$(GTEST_ROOT)/include +else +GTESTLIBDIR = +GTESTLIBS = +GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. +# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. +# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. +# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) +# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below +ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") + override CUDA_HOME=disabled +endif + +# If CUDA_HOME is not set, try to set it from the path to nvcc +ifndef CUDA_HOME + CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") +endif + +# If HIP_HOME is not set, try to set it from the path to hipcc +ifndef HIP_HOME + HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + $(warning HIP_HOME was not set: using "$(HIP_HOME)") +endif + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), +# builds are performed for HIP only if CUDA is not found in the path. +# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. +# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +#--- Option 1: CUDA exists -> use CUDA + +# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists +ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) + + GPUCC = $(CUDA_HOME)/bin/nvcc + USE_NVTX ?=-DUSE_NVTX + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # Embed device code for 70, and PTX for 70+. + # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + CUINC = -I$(CUDA_HOME)/include/ + CUOPTFLAGS = -lineinfo + ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + CUBUILDRULEFLAGS = -Xcompiler -fPIC -c + CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu + CUDATESTFLAGS = -lcuda + + # Set the host C++ compiler for GPUCC via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifneq ($(origin REQUIRE_CUDA),undefined) + + # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) + $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) + +#--- Option 2: CUDA does not exist, HIP exists -> use HIP + +# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists +else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) + + GPUCC = $(HIP_HOME)/bin/hipcc + #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? + HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a + HIPINC = -I$(HIP_HOME)/include/ + # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP + # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) + # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) + GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + GPUFLAGS += -std=c++17 + ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) + CUBUILDRULEFLAGS = -fPIC -c + CCBUILDRULEFLAGS = -fPIC -c -x hip + +else ifneq ($(origin REQUIRE_HIP),undefined) + + # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) + $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) + +#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP + +else + + # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ + $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) + $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) + override GPUCC= + override USE_NVTX= + override CUINC= + override HIPINC= + +endif + +# Export GPUCC (so that it can also be used in cudacpp_src.mk?) +export GPUCC +export GPUFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= -Xcompiler -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) +override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +# Set the default AVX (vectorization) choice +ifeq ($(AVX),) + ifeq ($(UNAME_P),ppc64le) + ###override AVX = none + override AVX = sse4 + else ifeq ($(UNAME_P),arm) + ###override AVX = none + override AVX = sse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override AVX = none + $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override AVX = 512y + ###$(info Using AVX='$(AVX)' as no user input exists) + else + override AVX = avx2 + ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + $(warning Using AVX='$(AVX)' because host does not support avx512vl) + else + $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) + endif + endif +else + ###$(info Using AVX='$(AVX)' according to user input) +endif + +# Set the default FPTYPE (floating point type) choice +ifeq ($(FPTYPE),) + override FPTYPE = d +endif + +# Set the default HELINL (inline helicities?) choice +ifeq ($(HELINL),) + override HELINL = 0 +endif + +# Set the default HRDCOD (hardcode cIPD physics parameters?) choice +ifeq ($(HRDCOD),) + override HRDCOD = 0 +endif + +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +export AVX +export FPTYPE +export HELINL +export HRDCOD +export OMPFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASHIPRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),none) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASHIPRAND=$(HASHIPRAND)) + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIR = ../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) +else + override BUILDDIR = . + override LIBDIR = ../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override CULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override CULIBFLAGSRPATH2 = +else + # RPATH to cuda/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking cuda/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + + +.PHONY: all $(DIRS) + +# Assuming DIRS is defined as before +DIRS := $(wildcard P*) + +# Construct the library paths +rwgtlib := $(addprefix ,$(addsuffix /librwgt.a,$(DIRS))) + +cxx_rwgt=$(BUILDDIR)/rwgt.exe +ifneq ($(GPUCC),) +cu_rwgt=$(BUILDDIR)/grwgt.exe +grwgtlib := $(addprefix $(DIRS)/,libgrwgt.a) +else +cu_rwgt= +grwgtlib= +endif +ifneq ($(GTESTLIBS),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUOPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Generic target and build rules: objects from CUDA or HIP compilation +# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) +ifneq ($(GPUCC),) +$(BUILDDIR)/%%.o : %%.cu *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ + +$(BUILDDIR)/%%_cu.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ +endif + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUINC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ + +# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +ifeq ($(findstring nvcc,$(GPUCC)),nvcc) + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math +else + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math +endif +endif + +# # Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) +# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) +# $(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) + +# # Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o +# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) +# endif +# ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +# endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +#processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +#MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +#cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o +#cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o + +#ifneq ($(GPUCC),) +#MG5AMC_CULIB = mg5amc_$(processid_short)_cuda +#cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o +#cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o +#endif + +# Target (and build rules): C++ and CUDA shared libraries +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so +# $(CXX) -shared -o $@ $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +#$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o +#$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# Bypass std::filesystem completely to ease portability on LUMI #803 +ifneq ($(findstring hipcc,$(GPUCC)),) + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +else + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +#HERE LOOP MAKE OVER P DIRECTORIES AND ADD RWGT_RUNNER_LIBS +# Ensure each librwgt.a depends on its directory being built +$(rwgtlib): + @$(MAKE) -C $(@D) VARIABLE=true + +# Target (and build rules): C++ and CUDA standalone executables +$(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(rwgtlib) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(rwgtlib) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(cu_rwgt): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(cu_rwgt): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(cu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(cu_rwgt): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cu_rwgt): rwgtlibs $(BUILDDIR)/grwgt.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(DIRS) + $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(grwgtlib) $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +#$(BUILDDIR)/%%.o : %%.f *.inc +# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +# $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc + +#ifeq ($(UNAME_S),Darwin) +#$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +#endif +#$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +#$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +#ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +#else +# $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +#endif + +# ifneq ($(GPUCC),) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# endif +# ifeq ($(UNAME_S),Darwin) +# $(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +# endif +# $(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) +# endif +# endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +# $(BUILDDIR)/testxxx.o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt +# $(testmain): $(BUILDDIR)/testxxx.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt +# $(testmain): $(BUILDDIR)/testxxx_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions +# endif + +# $(BUILDDIR)/testmisc.o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/testmisc.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/testmisc_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests +# endif + +# $(BUILDDIR)/runTest.o: $(GTESTLIBS) +# $(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/runTest.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +# $(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +# endif +# $(testmain): $(BUILDDIR)/runTest_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o +# endif + +# $(testmain): $(GTESTLIBS) +# $(testmain): INCFLAGS += $(GTESTINC) +# $(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main + +# ifneq ($(OMPFLAGS),) +# ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +# $(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +# else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +# $(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +# ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +# ###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +# else +# $(testmain): LIBFLAGS += -lgomp +# endif +# endif + +# # Bypass std::filesystem completely to ease portability on LUMI #803 +# #ifneq ($(findstring hipcc,$(GPUCC)),) +# #$(testmain): LIBFLAGS += -lstdc++fs +# #endif + +# ifeq ($(GPUCC),) # link only runTest.o +# $(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +# $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +# else # link both runTest.o and runTest_cu.o +# $(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) +# endif +# endif + +# # Use target gtestlibs to build only googletest +# ifneq ($(GTESTLIBS),) +# gtestlibs: $(GTESTLIBS) +# endif + +# # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +# $(GTESTLIBS): +# ifneq ($(shell which flock 2>/dev/null),) +# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +# flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +# else +# if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +# endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) +# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds +# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) +avxnone: + @echo + $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) + +avxsse4: + @echo + $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) + +avxavx2: + @echo + $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) + +avx512y: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) + +avx512z: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else ifeq ($(UNAME_P),arm) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z +avxall: avxnone avxsse4 avxavx2 avx512y avx512z +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so +endif + $(MAKE) -C ../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: check (run the C++ test executable) +# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] +ifneq ($(GPUCC),) +check: runTest cmpFcheck cmpFGcheck +else +check: runTest cmpFcheck +endif + +# Target: runTest (run the C++ test executable runTest.exe) +runTest: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/runTest.exe + +# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 + +# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) +runGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 + +# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 + +# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) +runFGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 + +# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) +cmpFGcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk similarity index 98% rename from epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk rename to epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk index efe82df88d..2c5f8509bb 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk @@ -571,21 +571,24 @@ override RUNTIME = cxx_main=$(BUILDDIR)/check.exe fcxx_main=$(BUILDDIR)/fcheck.exe +cxx_rwgtlib=$(BUILDDIR)/librwgt.a ifneq ($(GPUCC),) cu_main=$(BUILDDIR)/gcheck.exe fcu_main=$(BUILDDIR)/fgcheck.exe +cu_rwgtlib=$(BUILDDIR)/libgrwgt.a else cu_main= fcu_main= +cu_rwgtlib= endif testmain=$(BUILDDIR)/runTest.exe ifneq ($(GTESTLIBS),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(testmain) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) $(testmain) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) endif # Target (and build options): debug @@ -728,6 +731,11 @@ $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PAT $(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +# Target (and build rules): C++ and CUDA rwgt libraries +cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) +$(cxx_rwgtlib): $(cxx_rwgtfiles) + ar rcs $@ $^ + ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) $(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') @@ -738,6 +746,9 @@ endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) +cu_rwgtfiles := $(BUILDDIR)/grwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(cu_objects_exe) +$(cu_rwgtlib): $(cu_rwgtfiles) + ar rcs $@ $^ endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index dad73e6a6e..ceab1e210c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2041,19 +2041,36 @@ def get_rwgt_includes(self): """Return string with the include directives for the REX reweighting""" return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) + def write_rwgt_header(self): + """Writes a simple rwgt_runner.h file to forward declare the runner object""" + # Adjust the placeholders for use with `.format()` + rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H + #define {namespace}_RWGT_RUNNER_H + #include \"teawREX.hpp\" + #include \"rwgt_instance.h\" + namespace {namespace} {{ + extern rwgt::instance runner; + }} + #endif""".format(namespace=self.get_proc_dir()) + + # Using `with` statement for better file handling + with open(os.path.join(self.path, 'rwgt_runner.h'), 'w') as ff: + ff.write(rwgt_h) + def edit_rwgt_runner(self): """Create the rwgt_runner.cc file for the REX reweighting""" ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') # Create the rwgt_runner.cc file # replace_dict = {} replace_dict = super().get_process_class_definitions(write=False) - rwgt_runner = self.get_proc_dir() + self.rwgt_template +# rwgt_runner = self.get_proc_dir() + self.rwgt_template replace_dict['process_namespace'] = self.get_proc_dir() replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() replace_dict['init_prt_ids'] = self.get_init_prts_vec(self.matrix_elements[0].get('processes')[0]) replace_dict['fin_prt_ids'] = self.get_fin_prts_vec(self.matrix_elements[0].get('processes')[0]) replace_dict['process_event'] = self.get_rwgt_legs(self.matrix_elements[0].get('processes')[0]) template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() + self.write_rwgt_header() ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') ff.write(template % replace_dict) ff.close() diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 229a7dac94..f8264c8f93 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -372,12 +372,15 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', s+'gpu/perf.py', s+'gpu/profile.sh', - s+'CMake/SubProcesses/CMakeLists.txt'], + s+'CMake/SubProcesses/CMakeLists.txt', + s+'gpu/cudacpp_rex_driver.mk', + s+'REX/rwgt_instance.h', s+'REX/REX.hpp', s+'REX/teawREX.hpp'], 'test': [s+'gpu/cudacpp_test.mk']} - from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') - from_template['SubProcesses'].append(s+'REX/REX.hpp') - from_template['SubProcesses'].append(s+'REX/teawREX.hpp') +# from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') +# from_template['SubProcesses'].append(s+'REX/REX.hpp') +# from_template['SubProcesses'].append(s+'REX/teawREX.hpp') +# from_template['SubProcesses'].append(s+'gpu/cudacpp_rex_driver.mk') to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', @@ -401,13 +404,14 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 - 'perf.py', 'profile.sh'] + 'perf.py', 'profile.sh', + 'rwgt_instance.h', 'REX.hpp', 'teawREX.hpp'] - to_link_in_P.append('rwgt_instance.h') - to_link_in_P.append('REX.hpp') - to_link_in_P.append('teawREX.hpp') +# to_link_in_P.append('rwgt_instance.h') +# to_link_in_P.append('REX.hpp') +# to_link_in_P.append('teawREX.hpp') - template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex.mk') + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex_runner.mk') # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') @@ -457,7 +461,7 @@ def export_driver(self): replace_dict['include_lines'] = '' replace_dict['run_set'] = '' for name in self.rwgt_names: - replace_dict['include_lines'] += '#include "%s/rwgt_runner.cc"\n' % name + replace_dict['include_lines'] += '#include "%s/rwgt_runner.h"\n' % name replace_dict['run_set'] += '%s::runner,' % name replace_dict['run_set'] = replace_dict['run_set'][:-1] template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index f4c6ab927f..4fe4023730 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -91,7 +91,7 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) std::vector runSet = {%(run_set)s}; - std::vector runSet; +// std::vector runSet; REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); fileCol.initCards(); diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index 374810a1aa..e87219b001 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -17,6 +17,8 @@ namespace rwgt{ + using FORTRANFPTYPE = double; + //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 689daabcdd..14d2dfdc79 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -14,10 +14,10 @@ #include "teawREX.hpp" #include "rwgt_instance.h" +#include "fbridge.cc" // ZW: SET UP NAMESPACE namespace %(process_namespace)s{ -#include "fbridge.cc" //namespace dummy{ struct fbridgeRunner{ @@ -111,7 +111,7 @@ namespace %(process_namespace)s{ // ZW: SET UP INPUT LHE BLOCK // ZW: SET UP REX::event FROM LHE BLOCK // auto procEvent = REX::event( procEvent ); - REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; +// REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; std::vector> eventVec = {%(process_event)s}; REX::event locEv = REX::event( eventVec ); diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 2c3c7ec7d1..971b563f82 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -27,10 +27,6 @@ #include #include "REX.hpp" -#ifndef FORTRANFPTYPE -#define FORTRANFPTYPE double -#endif - namespace REX::teaw { From fb7a2546550ba087175f7b4ebdace72504c00ac3 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 9 Apr 2024 14:00:08 +0200 Subject: [PATCH 08/76] separated REX into header and implementation, fixed compilation of P-directories into shared libraries that are linked to rwgt driver --- MG5aMC/mg5amcnlo | 2 +- .../template_files/gpu/cudacpp_rex_driver.mk | 17 +- .../template_files/gpu/cudacpp_rex_runner.mk | 12 +- .../iolibs/template_files/gpu/cudacpp_src.mk | 4 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 3 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 9 +- tools/REX/REX.cc | 1 + tools/REX/REX.h | 830 ++++++++ tools/REX/REX.hpp | 1181 ++++------- tools/REX/rwgt_driver.cc | 13 +- tools/REX/rwgt_instance.cc | 78 + tools/REX/rwgt_instance.h | 53 +- tools/REX/rwgt_runner.cc | 6 +- tools/REX/teawREX.cc | 1 + tools/REX/teawREX.h | 188 ++ tools/REX/teawREX.hpp | 218 +- tools/REX/tester.cpp | 19 +- tools/REX/unweighted_events.lhe | 1870 ----------------- 18 files changed, 1695 insertions(+), 2810 deletions(-) create mode 120000 tools/REX/REX.cc create mode 100644 tools/REX/REX.h create mode 100644 tools/REX/rwgt_instance.cc create mode 120000 tools/REX/teawREX.cc create mode 100644 tools/REX/teawREX.h delete mode 100644 tools/REX/unweighted_events.lhe diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 9d809a7a3d..3e4903eb5c 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 9d809a7a3d149298ec5d77bacd9e3407f344c9da +Subproject commit 3e4903eb5cdcd852339e47ddaf2c741bb644498e diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk index 3a8c3e3e98..9889da9575 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk @@ -572,25 +572,24 @@ override RUNTIME = .PHONY: all $(DIRS) -# Assuming DIRS is defined as before DIRS := $(wildcard P*) + # Construct the library paths -rwgtlib := $(addprefix ,$(addsuffix /librwgt.a,$(DIRS))) +cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) +rwgtlib := $(addprefix ,$(addsuffix /librwgt.so,$(DIRS))) cxx_rwgt=$(BUILDDIR)/rwgt.exe ifneq ($(GPUCC),) cu_rwgt=$(BUILDDIR)/grwgt.exe -grwgtlib := $(addprefix $(DIRS)/,libgrwgt.a) +grwgtlib := $(addprefix $(DIRS)/,libgrwgt.so) +cu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cuda"}'; done) else cu_rwgt= grwgtlib= +cu_proclibs= endif -ifneq ($(GTESTLIBS),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) -else all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) -endif # Target (and build options): debug MAKEDEBUG= @@ -735,7 +734,7 @@ $(rwgtlib): # Target (and build rules): C++ and CUDA standalone executables $(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(rwgtlib) - $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(rwgtlib) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -l$(cxx_proclibs) $(rwgtlib) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -746,7 +745,7 @@ $(cu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) endif $(cu_rwgt): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_rwgt): rwgtlibs $(BUILDDIR)/grwgt.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(DIRS) - $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(grwgtlib) $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) + $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) -l$(cu_proclibs) $(grwgtlib) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk index 2c5f8509bb..80fbe5b8e7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk @@ -571,12 +571,12 @@ override RUNTIME = cxx_main=$(BUILDDIR)/check.exe fcxx_main=$(BUILDDIR)/fcheck.exe -cxx_rwgtlib=$(BUILDDIR)/librwgt.a +cxx_rwgtlib=$(BUILDDIR)/librwgt.so ifneq ($(GPUCC),) cu_main=$(BUILDDIR)/gcheck.exe fcu_main=$(BUILDDIR)/fgcheck.exe -cu_rwgtlib=$(BUILDDIR)/libgrwgt.a +cu_rwgtlib=$(BUILDDIR)/libgrwgt.so else cu_main= fcu_main= @@ -733,8 +733,8 @@ $(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objec # Target (and build rules): C++ and CUDA rwgt libraries cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) -$(cxx_rwgtlib): $(cxx_rwgtfiles) - ar rcs $@ $^ +$(cxx_rwgtlib): $(cxx_rwgtfiles) $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_rwgtfiles) $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -747,8 +747,8 @@ $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) cu_rwgtfiles := $(BUILDDIR)/grwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(cu_objects_exe) -$(cu_rwgtlib): $(cu_rwgtfiles) - ar rcs $@ $^ +$(cu_rwgtlib): $(cu_rwgtfiles) $(cu_objects_lib) + $(GPUCC) -shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index f1e79433b1..1edd644dec 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -257,9 +257,9 @@ $(BUILDDIR)/%%_cu.o : %%.cc *.h $(BUILDDIR)/.build.$(TAG) #------------------------------------------------------------------------------- -cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s.o read_slha.o) +cxx_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s.o read_slha.o teawREX.o rwgt_instance.o) ifneq ($(GPUCC),) -cu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cu.o) +cu_objects=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cu.o teawREX_cu.o rwgt_instance_cu.o) endif # Target (and build rules): common (src) library diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index ceab1e210c..c31fe9c6c1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2003,7 +2003,7 @@ class PLUGIN_OneProcessExporterRwgt(PLUGIN_OneProcessExporter): # ZW - rwgt functions def get_rwgt_legs(self, process): """Return string with particle ids and status in the REX std::pair format""" - return ",".join(["{%i,%i}" % (leg.get('state'), leg.get('id')) \ + return ",".join(["{\"%i\",\"%i\"}" % (leg.get('state'), leg.get('id')) \ for leg in process.get('legs')]).replace('0', '-1') def get_init_prts_vec(self, process): @@ -2046,7 +2046,6 @@ def write_rwgt_header(self): # Adjust the placeholders for use with `.format()` rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H #define {namespace}_RWGT_RUNNER_H - #include \"teawREX.hpp\" #include \"rwgt_instance.h\" namespace {namespace} {{ extern rwgt::instance runner; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index f8264c8f93..6c6c16523e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -352,7 +352,10 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', - s+'CMake/src/CMakeLists.txt' ], + s+'CMake/src/CMakeLists.txt', + s+'REX/REX.cc', s+'REX/teawREX.cc', + s+'REX/REX.h', s+'REX/teawREX.h', + s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'], 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', @@ -374,7 +377,7 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): s+'gpu/perf.py', s+'gpu/profile.sh', s+'CMake/SubProcesses/CMakeLists.txt', s+'gpu/cudacpp_rex_driver.mk', - s+'REX/rwgt_instance.h', s+'REX/REX.hpp', s+'REX/teawREX.hpp'], + s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], 'test': [s+'gpu/cudacpp_test.mk']} # from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') @@ -405,7 +408,7 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 'perf.py', 'profile.sh', - 'rwgt_instance.h', 'REX.hpp', 'teawREX.hpp'] + 'rwgt_instance.h', 'REX.h', 'teawREX.h'] # to_link_in_P.append('rwgt_instance.h') # to_link_in_P.append('REX.hpp') diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc new file mode 120000 index 0000000000..65f267f382 --- /dev/null +++ b/tools/REX/REX.cc @@ -0,0 +1 @@ +REX.hpp \ No newline at end of file diff --git a/tools/REX/REX.h b/tools/REX/REX.h new file mode 100644 index 0000000000..2dead7d333 --- /dev/null +++ b/tools/REX/REX.h @@ -0,0 +1,830 @@ +/*** + * ______ _______ __ + * | ___ \ ___\ \ / / + * | |_/ / |__ \ V / + * | /| __| / \ + * | |\ \| |___/ /^\ \ + * \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _REX_H_ +#define _REX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ZW: all fcns within the REX standard sit in the +// namespace REX +// Note that as a convention, std::string_view objects will be +// referred to as strings unless the difference is relevant +namespace REX +{ + #pragma warning( push ) + #pragma warning( disable : 4101) + static const size_t npos = -1; + #pragma warning( pop ) + + using sortFcn = std::function>(std::vector)>; + using statSort = std::function>(std::string_view, std::vector)>; + + template + std::shared_ptr> stoiSort(const std::vector &vector); + extern template std::shared_ptr> stoiSort(const std::vector &vector); + + template + std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + extern template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + + std::shared_ptr> nuWordSplitter( std::string_view line ); + + struct xmlTree; + +// ZW: struct for handling tags in XML node opening tags + struct xmlTag { + public: + void setVal( std::string_view valSet ); + void setId( std::string_view idSet ); + std::string_view getVal(); + std::string_view getId(); + bool isModded(); + xmlTag(); + xmlTag( xmlTag& oldTag ); + xmlTag( std::string_view initId, std::string_view initVal); + protected: + bool modded; + std::string_view val; + std::string_view id; + }; + + struct xmlTree{ + public: + xmlTree(); + xmlTree( std::string_view file ); + xmlTree( std::string_view file, size_t& strt, size_t& nd ); + auto& getChildren(){ return children; } + std::string_view& getOrigin(){ return origin; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + size_t getContStart(){ return contSt; } + size_t getContEnd(){ return contEnd; } + bool isFaux(){ return faux; } + bool isInit(){ return initialised; } + bool hasChildren(){ return children->size() > 0; } + protected: + std::shared_ptr>> children; // vector of pointers to children nodes + std::string_view origin; + size_t start; // position of opening bracket of node opening + size_t end; // position of final character of ending node, including trailing blankspace + size_t contSt; + size_t contEnd; + bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes + bool initialised; + }; + + struct xmlNode { + public: + xmlNode(); + xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + xmlNode( xmlTree &tree ); + std::vector> getChildren(); + std::vector> getTags(); + std::string_view getFile(); + std::string_view getName(); + std::string_view getContent(); + size_t getStart(); + size_t getEnd(); + xmlTree getTree(); + virtual bool isModded(); + virtual bool isModded( bool deep ); + bool isWritten(); + bool isParsed(); + bool isFaux(); + bool hasChildren(); + void setModded( bool mod ); + bool deepModded(); + bool deepParse(); + void parser( bool recursive ); + void addChild( std::shared_ptr child ); + void addTag( std::shared_ptr tag ); + void setFile( std::string_view file ); + void setName( std::string_view newName ); + void setCont( std::string_view cont ); + protected: + virtual bool parse(); + virtual bool parse( bool recurs ); + bool parseTop(); + virtual bool parseContent(); + bool parseChildren( bool recursive ); + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + xmlTree structure; + std::vector> children; + std::vector> tags; + std::shared_ptr writtenSelf; + bool deepMod = false; + std::string_view xmlFile; + std::string_view name; + std::string_view content; + size_t start; + size_t end = npos; + bool modded = false; + bool written = false; + bool parsed = false; + bool deepParsed = false; + bool faux = false; + virtual void headWriter(); + virtual void endWriter(); + virtual void contWriter(); + virtual void childWriter(); + virtual void endFinder(); + virtual void fullWriter(); + public: + virtual int childCounter(); + virtual void childCounter( int& noChilds ); + virtual std::shared_ptr nodeWriter(); + }; + + struct lhePrt{ + public: + std::string_view getLine(); + std::string_view getComment(); + std::vector getMom(); + std::string_view getE(); + std::string_view getMass(); + std::string_view getVTim(); + std::string_view getSpin(); + std::string_view getPDG(); + std::string_view getStatus(); + std::vector getMothers(); + std::vector getColor(); + void setComment( std::string_view nuCom ); + void setMom( std::vector nuMom ); + void setEnergy( std::string_view nuE ); + void setMass( std::string_view nuM ); + void setVTim( std::string_view nuVTim ); + void setSpin( std::string_view nuSpin ); + void setPDG( std::string_view nuPDG ); + void setStatus( std::string_view nuSt ); + void setMothers( std::vector nuMum ); + void setColors( std::vector nuCol ); + bool isModded(); + bool isWritten(); + std::shared_ptr getContent(); + lhePrt(); + lhePrt( std::pair& prtInfo ); + lhePrt( std::pair& prtInfo ); + lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ); + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view mom[3]; + std::string_view energy; + std::string_view mass; + std::string_view vtim; + std::string_view spin; + std::string_view pdg; + std::string_view status; + std::string_view mothers[2]; + std::string_view icol[2]; + bool modded = false; + bool written = false; + void writer(); + }; + + struct evHead { + public: + std::string_view getComment(); + std::string_view getWeight(); + std::string_view getScale(); + std::string_view getAQED(); + std::string_view getAQCD(); + std::string_view getNprt(); + std::string_view getProcID(); + bool isModded(); + bool isWritten(); + void setComment( std::string_view nuCom ); + void setWeight( std::string_view nuWgt ); + void setScale( std::string_view nuScale ); + void setAQED( std::string_view nuAQED ); + void setAQCD( std::string_view nuAQCD ); + void setNprt( std::string_view nuNprt ); + void setNprt( int nuNprt ); + void setProcID( std::string_view nuProcID ); + std::shared_ptr getContent(); + evHead(); + evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ); + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view weight; + std::string_view scale; + std::string_view aqed; + std::string_view aqcd; + std::string_view nprt; + int nprtint; + std::string nprtstr; + std::string_view procid; + bool modded = false; + bool written = false; + void writer(); + }; + + struct bodyWgt : public xmlNode { + public: + void setComment( std::string_view nuComment ); + void setVal( std::string nuVal ); + void setVal( std::string_view nuVal ); + void setVal( double nuVal ); + void setId( std::string nuId ); + void setModded( bool nuModded ); + std::string_view getComment(); + std::string_view getValS(); + double getValD(); + bodyWgt(); + bodyWgt( std::string_view value ); + bodyWgt( double value ); + bodyWgt( std::string_view value, xmlTag rwgtId ); + bodyWgt( double value, xmlTag rwgtId ); + bodyWgt( std::string_view value, std::shared_ptr rwgtId ); + bodyWgt( double value, std::shared_ptr rwgtId ); + bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + bodyWgt( xmlNode& wgtNode ); + bodyWgt( xmlNode* wgtNode ); + bodyWgt( std::shared_ptr wgtNode ); + bodyWgt( xmlTree& wgtTree ); + bodyWgt( xmlTree* wgtTree ); + bodyWgt( std::shared_ptr wgtTree ); + bodyWgt( double value, std::string& idTag ); + void appendWgt( std::shared_ptr document ); + void appendWgt( std::string* document ); + std::shared_ptr appendWgt( std::string_view document ); + protected: + std::string_view comment; + std::string valS; + std::string id; + double valD; + void fullWriter() override; + }; + + struct event : public xmlNode { + public: + evHead getHead(); + std::vector> getPrts(); + std::vector> getWgts(); + void setHead( evHead head ); + void addPrt( std::shared_ptr prtcl ); + void addPrt( lhePrt prtcl ); + void setPrts( std::vector> prtcls ); + void addWgt( bodyWgt nuWgt ); + void addWgt( std::shared_ptr nuWgt ); + void addWgt( bodyWgt nuWgt, std::string& id ); + void addWgt( std::shared_ptr nuWgt, std::string& id ); + bool newWeight(); + int getNprt(); + bool isModded() override; + bool isModded( bool deep ) override ; + event(); + event( std::vector>& prtInfo ); + event( std::vector>& prtInfo ); + event( std::vector> prtInfo ); + event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) ; + event( const xmlNode& originFile ); + event( const xmlNode* originFile ); + event( const std::shared_ptr& originFile ); + event( xmlTree& originFile ); + event( xmlTree* originFile ); + event( std::shared_ptr originFile ); + bool prtsAreMod(); + bool headIsMod(); + bool isSpecSort() const; + sortFcn getSortFcn() const; + statSort getStatSort() const; + protected: + std::vector> rwgt; + std::shared_ptr childRwgt; + bool hasRwgt(); + bool rwgtChild(); + bool bothRwgt(); + bool eitherRwgt(); + evHead header; + bool hasBeenProc = false; + std::vector> prts; + std::map> procMap; + std::map> procOrder; + sortFcn eventSort = []( std::vector vec ){ return stoiSort( vec ); }; + statSort specSort = []( std::string_view stat, std::vector vec ){ return stoiSort( vec ); }; + bool specSorted = false; + bool initProcMap(bool hard = false); + bool initProcMap( sortFcn sorter, bool hard = false ); + bool initProcMap( statSort sorter, bool hard = false ); + bool inRwgtChild( std::string_view name ); + bool checkRwgtOverlap(); + void childRwgtWriter(); + void vecRwgtWriter( bool midNode = false ); + void rwgtWriter(); + void contWriter() override; + void childWriter() override; + bool addedWgt = false; + void fullWriter() override; + void fullWriter( bool deep ); + void appendWgts(); + public: + std::shared_ptr nodeWriter() override; + std::shared_ptr nodeWriter( bool recursive ); + std::map> &getProc(); + std::map> &getProcOrder(); + std::map> getProc() const; + std::map> getProcOrder() const; + std::map> &getProc(sortFcn sorter); + std::map> &getProcOrder(sortFcn sorter); + std::map> &getProc(statSort sorter); + std::map> &getProcOrder(statSort sorter); + }; + + struct paramVal{ + public: + double value = 0; + int id = 0; + std::string_view realLine; + std::string_view comment; + std::string_view idStr; + std::string_view valStr; + virtual void parse(); + paramVal(); + paramVal( std::string_view paramLine, bool parseOnline = false ); + bool isMod(); + bool modded = false; + virtual std::shared_ptr selfWrite(); + }; + + struct paramBlock { + public: + std::string_view realBlock; + size_t startPt; + std::string_view comment; + std::string_view initComm; + std::string_view name; + std::vector params; + virtual void parse( bool parseOnline = false ); + paramBlock(); + paramBlock( std::string_view paramSet, bool parseOnline = false ); + bool isMod(); + bool modded = false; + virtual std::shared_ptr selfWrite(); + }; + + struct decVal : public paramVal{ + public: + void parse() override; + decVal( std::string_view paramLine = "", bool parseOnline = false ); + std::shared_ptr selfWrite() override; + }; + + struct decBlock : public paramBlock { + public: + std::vector decays; + void parse( bool parseOnline = false ) override; + void parse( std::shared_ptr> decLines, bool parseOnline = false ); + decBlock( std::string_view paramSet = "", bool parseOnline = false ); + std::shared_ptr selfWrite() override; + }; + + bool clStringComp( std::string_view str1, std::string str2 ); + + struct lesHouchesCard { + public: + decBlock decays; + std::string_view xmlFile; + size_t start; + size_t end; + bool modded; + bool parsed; + std::string_view header; + std::vector blocks; + size_t blockStart; + std::function lambda = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; + std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; + std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) + { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; + void parse( bool parseOnline = false ); + lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ); + bool isMod(); + std::shared_ptr selfWrite(); + }; + + + struct headWeight : public xmlNode { + public: + int getId(); + std::string_view getTag(); + bool hasTag(); + headWeight(); + headWeight( std::string_view paramSet, const size_t& begin = 0 ); + headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ); + headWeight( xmlNode& node ); + headWeight( xmlNode* node ); + headWeight( std::shared_ptr node ); + headWeight( xmlTree& tree ); + headWeight( xmlTree* tree ); + headWeight( std::shared_ptr tree ); + headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ); + headWeight( std::string_view paramSet, std::string& idText); + void setId( std::string identity ); + protected: + std::string idTag; + long unsigned int id = npos; + void headWriter() override; + void headWriter( bool incId ); + void endWriter() override; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + void fullWriter() override; + void fullWriter( bool incId, bool hasChildren=true ); + }; + + + // ZW: struct for handling rwgt groups + // in the LHE header initrwgt node + struct weightGroup : public xmlNode { + public: + bool getIncId(); + void setIncId( bool nuIncId ); + std::vector> getWgts(); + void addWgt( headWeight nuWgt ); + void addWgt( std::shared_ptr nuWgt ); + weightGroup(); + weightGroup( std::vector> nuWgts ); + weightGroup( std::vector nuWgts ); + weightGroup( xmlNode& wgtNode ); + weightGroup( xmlNode* wgtNode ); + weightGroup( xmlTree& wgtTree ); + weightGroup( xmlTree* wgtTree ); + weightGroup( std::shared_ptr wgtTree ); + weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + protected: + std::string_view rwgtName; + std::string_view wgtNamStrat; + bool includeId = false; + std::vector> paramSets; + bool nu; + std::string_view idTag; + int id; + void headWriter() override; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + void endWriter() override; + }; + + + struct initRwgt : public xmlNode { + public: + std::vector> getGroups(); + size_t noGrps(); + void addGroup( weightGroup nuGroup ); + void addGroup( std::shared_ptr nuGroup ); + void addWgt( unsigned int index, std::shared_ptr nuWgt ); + void addWgt( unsigned int index, headWeight nuWgt ); + initRwgt(); + initRwgt( std::vector> nuGroups ); + initRwgt( xmlNode& wgtNode ); + initRwgt( xmlNode* wgtNode ); + initRwgt( std::shared_ptr wgtNode ); + initRwgt( xmlTree& wgtTree ); + protected: + bool grpIsInit = false; + bool grpInit( std::shared_ptr& wgt ); + std::vector> groups; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + }; + + struct lheInitHead{ + public: + std::string_view idbmup[2]; + std::string_view ebmup[2]; + std::string_view pdfgup[2]; + std::string_view pdfsup[2]; + std::string_view idwtup; + std::string_view nprup; + bool isWritten(); + bool isModded(); + std::shared_ptr getContent(); + lheInitHead( std::string_view initHead ); + lheInitHead( xmlNode& initNode ); + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(); + }; + + struct lheInitLine { + public: + std::string_view xsecup; + std::string_view xerrup; + std::string_view xmaxup; + std::string_view lprup; + bool isWritten(); + bool isModded(); + std::shared_ptr getContent(); + lheInitLine(); + lheInitLine( std::string_view procLine ); + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(); + }; + + + struct slhaNode : public xmlNode { + public: + std::shared_ptr getParameters(); + slhaNode(); + slhaNode( lesHouchesCard parameters ); + slhaNode( std::shared_ptr parameters ); + slhaNode( xmlNode& node, bool parseOnline = false ); + slhaNode( xmlNode* node, bool parseOnline = false ); + slhaNode( std::shared_ptr node, bool parseOnline = false ); + slhaNode( xmlTree tree, bool parseOnline = false ); + slhaNode( std::shared_ptr tree, bool parseOnline = false ); + slhaNode( xmlTree* tree, bool parseOnline = false ); + slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ); + protected: + std::shared_ptr parameterCard; + bool pCardInit = false; + void headWriter() override; + void endWriter() override; + void contWriter() override; + }; + + struct initNode : public xmlNode { + public: + std::shared_ptr getHead(); + std::vector> getLines(); + void setHead( std::shared_ptr head ); + void setLines( std::vector> lines ); + void addLine( std::shared_ptr line ); + initNode(); + initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ); + initNode( xmlNode& node, bool parseOnline = false ); + initNode( xmlNode* node, bool parseOnline = false ); + initNode( std::shared_ptr node, bool parseOnline = false ); + initNode( xmlTree tree, bool parseOnline = false ); + initNode( std::shared_ptr tree, bool parseOnline = false ); + initNode( xmlTree* tree, bool parseOnline = false ); + protected: + std::shared_ptr initHead; + std::vector> initLines; + bool parseContent() override; + void contWriter() override; + }; + + struct lheHead : public xmlNode { + public: + size_t addWgtGroup( std::shared_ptr& wgtGroup ); + size_t addWgtGroup( weightGroup wgtGroup ); + void addWgt( size_t index, std::shared_ptr nuWgt ); + void addWgt( size_t index, headWeight nuWgt ); + void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ); + void addWgt( size_t index, headWeight nuWgt, std::string idTagg ); + void setInitRwgt( initRwgt initWgt ); + void setInitRwgt( std::shared_ptr initWgt ); + std::vector> getWgtGroups(); + std::shared_ptr getInitRwgt(); + std::shared_ptr getParameters(); + void setParameters( std::shared_ptr params ); + bool rwgtInc(); + lheHead(); + lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + lheHead( xmlNode& node ); + lheHead( xmlNode* node ); + lheHead( std::shared_ptr node ); + lheHead( xmlTree tree ); + lheHead( std::shared_ptr tree ); + lheHead( xmlTree* tree ); + protected: + bool wgtGrpIsInit = false; + bool wgtGrpInit( std::shared_ptr& wgtGrp ); + std::shared_ptr parameters; + bool hasRwgt = false; + std::shared_ptr rwgtNodes; + std::vector> initrwgt; + bool relChildSet = false; + std::vector relChild; + void setRelChild(); + bool parseChildren( bool recursive ); + void headWriter() override; + void childWriter() override; + void fullWriter() override; + }; + + struct newWgt{ + protected: + std::shared_ptr headWgt; + std::vector> bodyWgts; + public: + newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ); + newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ); + newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ); + newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ); + newWgt( std::string& parameters ); + newWgt( std::string& parameters, std::string& idTag ); + std::shared_ptr getHeadWgt(); + std::vector> getBodyWgts(); + void addBdyWgts( std::shared_ptr> wgts ); + }; + + + struct lheNode : public xmlNode { + public: + lheNode(); + lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + std::shared_ptr getHeader(); + std::shared_ptr getInit(); + std::vector> getEvents(); + bool isModded() override; + bool isModded( bool deep ) override; + void setInit( std::shared_ptr initNod ); + void setHeader( std::shared_ptr headNod ); + void addWgt( size_t index, newWgt& addedWgt ); + void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ); + void setRelStats( std::vector& particles ); + std::vector& getRelStats(); + void setSameSort( sortFcn& sortF ); + sortFcn& getSameSort(); + void setStatSort( statSort& statS ); + statSort& getStatSort(); + protected: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + std::vector relStat = {"-1", "1"}; + sortFcn particleSort = []( std::vector prts ){ return stoiSort(prts); }; + statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stoiSort(prts); }; + virtual void headerWriter(); + virtual void initWriter(); + virtual void eventWriter(); + void contWriter() override; + void fullWriter() override; + public: + virtual std::shared_ptr nodeWriter(); + }; + + struct evtInfo { + public: + std::vector wgts; + std::vector scales; + std::vector aQEDs; + std::vector aQCDs; + std::vector nprts; + std::vector relNPrts; + std::vector procIDs; + evtInfo( const std::vector>& lheFile = {} ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + sortFcn sorter ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + statSort sorter ); + }; + + struct prtInfo { + public: + std::vector moms; + std::vector masses; + std::vector vtims; + std::vector spins; + std::vector statuses; + std::vector mothers; + std::vector icols; + std::vector pdgs; + prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + sortFcn sorter ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + statSort sorter ); + }; + + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + std::shared_ptr process; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, + statSort sorter, + std::vector statVec = { "-1", "1" } ); + }; + + struct transLHE { + public: + std::string_view xmlFile; + std::vector> subProcs; + std::vector> procSets; + std::vector>> relProcs; + transLHE(); + transLHE( lheNode& lheFile ); + transLHE( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = { "-1", "1" } ); + transLHE( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = { "-1", "1" } ); + transLHE( lheNode& lheFile, const std::vector& statVec ); + std::shared_ptr> vectorFlat( std::vector>> vecVec ); + }; + + struct lheRetDs{ + public: + bool ebmup = false; + bool xsecup = false; + bool xerrup = false; + bool xmaxup = false; + bool xwgtup = false; + bool scalup = false; + bool aqedup = false; + bool aqcdup = false; + bool pup = true; + bool mass = false; + bool vtimup = false; + bool spinup = false; + std::vector getBools(); + }; + + // ZW: bool struct to define which int values + // to extract transposed from LHE file + struct lheRetInts{ + public: + //bool maxpup = false; + bool idbmup = false; + bool pdfgup = false; + bool pdfsup = false; + bool idwtup = false; + bool nprup = false; + bool lprup = false; + //bool maxnup = false; + bool nup = true; + bool idprup = false; + bool idup = true; + bool istup = true; + bool mothup = false; + bool icolup = false; + std::vector getBools(); + }; + + struct eventComp{ + bool operator()( event& firstEv, event& secEv); + bool operator()( const event& firstEv, const event& secEv) const; + bool operator()(event& firstEv, event& secEv, std::vector statVec); + }; + + +std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ); + +std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ); + +// struct lhePrt; +// struct xmlNode; +// struct event : public xmlNode; +// event& makeEv( std::vector>& particles ); +// std::vector> getParticles( event& ev ); +// struct eventComp; +} + +#endif diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index bbf7596c2f..706c65cca4 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -15,8 +15,8 @@ // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. -#ifndef _REX_HPP_ -#define _REX_HPP_ +#ifndef _REX_CC_ +#define _REX_CC_ #include #include @@ -35,6 +35,8 @@ #include #include #include +#include "REX.h" +#include // ZW: all fcns within the REX standard sit in the // namespace REX @@ -42,10 +44,6 @@ // referred to as strings unless the difference is relevant namespace REX { - #pragma warning( push ) - #pragma warning( disable : 4101) - static const size_t npos = -1; - #pragma warning( pop ) using sortFcn = std::function>(std::vector)>; using statSort = std::function>(std::string_view, std::vector)>; @@ -67,9 +65,11 @@ namespace REX template std::shared_ptr> stoiSort(const std::vector &vector) { - std::function stoicomp = [](const T& i, const T& j) { return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; + std::function stoicomp = [](const T& i, const T& j) { + return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; return indSort(vector, stoicomp); } + template std::shared_ptr> stoiSort(const std::vector &vector); // ZW: wrapper for indSort for comparing string-type arguments representing doubles template @@ -92,22 +92,24 @@ namespace REX indexMap[reference[i]].push(i); } - std::shared_ptr> order; - order->reserve(to_sort.size()); // Pre-allocate memory - + auto order = std::make_shared>(std::vector(to_sort.size(), npos)); + //order->reserve(to_sort.size()); // Pre-allocate memory + size_t pos = 0; for (const auto& elem : to_sort) { auto it = indexMap.find(elem); if (it != indexMap.end() && !it->second.empty()) { - order->push_back(it->second.front()); + order->at(pos) = (it->second.front()); it->second.pop(); - } else { + } //else { // Element in vec2 not found in vec1 - order->push_back(npos); - } + // order->at(pos) = npos; + //} + ++pos; } return order; } + template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); // ZW: minimal fcn for counting the amount of times // a given search term appears in a string @@ -232,16 +234,32 @@ namespace REX // ZW: templated fcn for comparing two // string-like objects, ignoring cases - template - bool clStringComp( const Str1& org, const Str2& comp ){ + bool clStringComp( std::string_view org, std::string comp ){ return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); } - template - bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ - return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + bool clStringComp( std::string_view org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); } + bool clStringComp( std::string org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + bool clStringComp( std::string org, std::string comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + // template + // bool clStringComp( const Str1& org, const Str2& comp ){ + // return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } + // template + // bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ + // return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } // ZW: templated fcn for finding a caseless substring searchTerm in srcFile // On failure to find searchTerm, returns REX::npos @@ -347,25 +365,18 @@ namespace REX } // ZW: struct for handling tags in XML node opening tags - struct xmlTag { - public: - void setVal( std::string_view valSet ){ modded = true; val = valSet; } - void setId( std::string_view idSet ){ modded = true; id = idSet; } - std::string_view getVal(){ return val; } - std::string_view getId(){ return id; } - bool isModded(){ return modded; } - xmlTag(){ modded = false; return; } - xmlTag( xmlTag& oldTag ){ + void xmlTag::setVal( std::string_view valSet ){ modded = true; val = valSet; } + void xmlTag::setId( std::string_view idSet ){ modded = true; id = idSet; } + std::string_view xmlTag::getVal(){ return val; } + std::string_view xmlTag::getId(){ return id; } + bool xmlTag::isModded(){ return modded; } + xmlTag::xmlTag(){ modded = false; return; } + xmlTag::xmlTag( xmlTag& oldTag ){ modded = false; val = oldTag.getVal(); id = oldTag.getId(); } - xmlTag( std::string_view initId, std::string_view initVal){ + xmlTag::xmlTag( std::string_view initId, std::string_view initVal){ modded = false; val = initVal; id = initId; } - protected: - bool modded; - std::string_view val; - std::string_view id; - }; // ZW: function for parsing XML opening // tags and returning the next header tag @@ -385,10 +396,8 @@ namespace REX // end of each node s.t. the proper node structures can accurately // detail where children begin and end while allowing for personal // content between child nodes - struct xmlTree { - public: - xmlTree(){ return; } - xmlTree( std::string_view file ){ + xmlTree::xmlTree(){ return; } + xmlTree::xmlTree( std::string_view file ){ origin = file; children = std::make_shared>>(); start = file.find_first_not_of(" \n\r\f\t\v"); @@ -428,7 +437,7 @@ namespace REX end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); initialised = true; } - xmlTree( std::string_view file, size_t& strt, size_t& nd ){ + xmlTree::xmlTree( std::string_view file, size_t& strt, size_t& nd ){ origin = file; children = std::make_shared>>(); start = file.find_first_not_of(" \n\r\f\t\v", strt); @@ -475,31 +484,10 @@ namespace REX strt = end; nd = nodeEndFind(file, strt); } - auto& getChildren(){ return children; } - std::string_view& getOrigin(){ return origin; } - size_t getStart(){ return start; } - size_t getEnd(){ return end; } - size_t getContStart(){ return contSt; } - size_t getContEnd(){ return contEnd; } - bool isFaux(){ return faux; } - bool isInit(){ return initialised; } - bool hasChildren(){ return children->size() > 0; } - protected: - std::shared_ptr>> children; // vector of pointers to children nodes - std::string_view origin; - size_t start; // position of opening bracket of node opening - size_t end; // position of final character of ending node, including trailing blankspace - size_t contSt; - size_t contEnd; - bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes - bool initialised; - }; // ZW: struct for handling nodes in generic XML files - struct xmlNode { - public: - xmlNode(){ modded = false; return; } - xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ + xmlNode::xmlNode(){ modded = false; return; } + xmlNode::xmlNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ){ modded = false; xmlFile = originFile; structure = xmlTree( originFile ); @@ -513,7 +501,7 @@ namespace REX children.push_back( std::make_shared( *child ) ); } } - xmlNode( xmlTree &tree ){ + xmlNode::xmlNode( xmlTree &tree ){ modded = false; structure = tree; if( !structure.isInit() ){ return; } @@ -528,43 +516,43 @@ namespace REX children.push_back( std::make_shared( *child ) ); } } - std::vector> getChildren(){ return children; } - std::vector> getTags(){ return tags; } - std::string_view getFile(){ return xmlFile; } - std::string_view getName(){ return name; } - std::string_view getContent(){ return content; } - size_t getStart(){ return start; } - size_t getEnd(){ return end; } - xmlTree getTree(){ return structure; } - virtual bool isModded(){ return modded; } - virtual bool isModded( bool deep ){ + std::vector> xmlNode::getChildren(){ return children; } + std::vector> xmlNode::getTags(){ return tags; } + std::string_view xmlNode::getFile(){ return xmlFile; } + std::string_view xmlNode::getName(){ return name; } + std::string_view xmlNode::getContent(){ return content; } + size_t xmlNode::getStart(){ return start; } + size_t xmlNode::getEnd(){ return end; } + xmlTree xmlNode::getTree(){ return structure; } + bool xmlNode::isModded(){ return modded; } + bool xmlNode::isModded( bool deep ){ bool modStat = isModded(); if( !deep ){ return modStat; } for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } return modStat; } - bool isWritten(){ return written; } - bool isParsed(){ return parsed; } - bool isFaux(){ return faux; } - bool hasChildren(){ return children.size() > 0; } - void setModded( bool mod ){ modded = mod; } - bool deepModded(){ return deepMod; } - bool deepParse(){ return deepParsed; } - void parser( bool recursive ){ + bool xmlNode::isWritten(){ return written; } + bool xmlNode::isParsed(){ return parsed; } + bool xmlNode::isFaux(){ return faux; } + bool xmlNode::hasChildren(){ return children.size() > 0; } + void xmlNode::setModded( bool mod ){ modded = mod; } + bool xmlNode::deepModded(){ return deepMod; } + bool xmlNode::deepParse(){ return deepParsed; } + void xmlNode::parser( bool recursive ){ parsed = parse( recursive ); } - void addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } - void addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } - void setFile( std::string_view file ){ modded = true; xmlFile = file; } - void setName( std::string_view newName ){ modded = true; name = newName; } - void setCont( std::string_view cont ){ modded = true; content = cont; } - protected: - virtual bool parse(){ + void xmlNode::addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } + void xmlNode::addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } + void xmlNode::setFile( std::string_view file ){ modded = true; xmlFile = file; } + void xmlNode::setName( std::string_view newName ){ modded = true; name = newName; } + void xmlNode::setCont( std::string_view cont ){ modded = true; content = cont; } + + bool xmlNode::parse(){ auto topStat = parseTop(); auto contStat = parseContent(); return ( topStat && contStat ); } - virtual bool parse( bool recurs ) + bool xmlNode::parse( bool recurs ) { bool parseSt = parse(); if( !recurs ){ return parseSt; } @@ -572,14 +560,14 @@ namespace REX deepMod = true; return (parseSt && childSt ); } - bool parseTop(){ + bool xmlNode::parseTop(){ if( xmlFile == "" ){ return false; } if( isFaux() ){ return true; } size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } return true; } - virtual bool parseContent(){ + bool xmlNode::parseContent(){ if( xmlFile == "" ){ return false; } end = structure.getContEnd(); for( auto branch : *(structure.getChildren()) ){ @@ -587,7 +575,7 @@ namespace REX } return true; } - bool parseChildren( bool recursive ){ + bool xmlNode::parseChildren( bool recursive ){ bool status = true; if( recursive ){ for( auto child : children ) @@ -604,25 +592,7 @@ namespace REX } return status; } - std::string nodeHeader; - std::string nodeContent; - std::string nodeEnd; - xmlTree structure; - std::vector> children; - std::vector> tags; - std::shared_ptr writtenSelf; - bool deepMod = false; - std::string_view xmlFile; - std::string_view name; - std::string_view content; - size_t start; - size_t end = npos; - bool modded = false; - bool written = false; - bool parsed = false; - bool deepParsed = false; - bool faux = false; - virtual void headWriter() { + void xmlNode::headWriter() { if( isFaux() ){ return; } nodeHeader = "<" + std::string(name) ; for( auto tag : tags ){ @@ -630,24 +600,24 @@ namespace REX } nodeHeader += ">"; } - virtual void endWriter() { + void xmlNode::endWriter() { if( isFaux() ){ return; } auto endSt = xmlFile.find_last_of("<", end); nodeEnd = xmlFile.substr( endSt, end - endSt ); } - virtual void contWriter() { + void xmlNode::contWriter() { if( hasChildren() ){ nodeContent = std::string(content.substr(0, children[0]->start - 1 )); } else { nodeContent = std::string(content); } } - virtual void childWriter() { + void xmlNode::childWriter() { for(auto child : children){ nodeContent += (*child->nodeWriter()); } } - virtual void endFinder(){ + void xmlNode::endFinder(){ auto headEnd = xmlFile.find(">", start); auto slashPos = xmlFile.find("/", start); if( headEnd > slashPos ){ end = headEnd; } @@ -655,7 +625,7 @@ namespace REX if( end == npos ){ end = xmlFile.size(); return; } end += 2; } - virtual void fullWriter(){ + void xmlNode::fullWriter(){ if( isModded() ){ headWriter(); contWriter(); @@ -669,8 +639,8 @@ namespace REX written = true; } } - public: - virtual void childCounter( int& noChilds ) + + void xmlNode::childCounter( int& noChilds ) { for( auto child : children ) { @@ -679,16 +649,16 @@ namespace REX } noChilds += children.size(); } - virtual int childCounter() { + int xmlNode::childCounter() { int noChilds = 0; childCounter( noChilds ); return noChilds; } - virtual std::shared_ptr nodeWriter() { + std::shared_ptr xmlNode::nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; } - }; + // ZW: function for large scale parsing of XML files // sequentially goes through the document and @@ -715,17 +685,15 @@ namespace REX // ZW: struct for handling rwgt parameter sets // in the LHE header initrwgt node - struct headWeight : public xmlNode { - public: - int getId(){ return id; } - std::string_view getTag(){ return idTag; } - bool hasTag(){ return (idTag.size() > 0); } - headWeight(){ name = "weight"; return; } - headWeight( std::string_view paramSet, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } - headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ) : xmlNode(){ + int headWeight::headWeight::getId(){ return id; } + std::string_view headWeight::getTag(){ return idTag; } + bool headWeight::hasTag(){ return (idTag.size() > 0); } + headWeight::headWeight(){ name = "weight"; return; } + headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight::headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } - headWeight( xmlNode& node ) : xmlNode( node ){ + headWeight::headWeight( xmlNode& node ) : xmlNode( node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -735,7 +703,7 @@ namespace REX } } } - headWeight( xmlNode* node ) : xmlNode( *node ){ + headWeight::headWeight( xmlNode* node ) : xmlNode( *node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -745,7 +713,7 @@ namespace REX } } } - headWeight( std::shared_ptr node ) : xmlNode( *node ){ + headWeight::headWeight( std::shared_ptr node ) : xmlNode( *node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -755,7 +723,7 @@ namespace REX } } } - headWeight( xmlTree& tree ) : xmlNode( tree ){ + headWeight::headWeight( xmlTree& tree ) : xmlNode( tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -765,7 +733,7 @@ namespace REX } } } - headWeight( xmlTree* tree ) : xmlNode( *tree ){ + headWeight::headWeight( xmlTree* tree ) : xmlNode( *tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -775,7 +743,7 @@ namespace REX } } } - headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ + headWeight::headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -785,17 +753,14 @@ namespace REX } } } - headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ + headWeight::headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } - headWeight( std::string_view paramSet, std::string& idText){ + headWeight::headWeight( std::string_view paramSet, std::string& idText){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; } - void setId( std::string identity ){ modded = true; idTag = identity; } - protected: - std::string idTag; - long unsigned int id = npos; - void headWriter() override{ + void headWeight::setId( std::string identity ){ modded = true; idTag = identity; } + void headWeight::headWriter(){ if( tags.size() == 0 ){ if( idTag == "" ){ nodeHeader = ""; return; } if( id == npos ){ nodeHeader = ""; return; } @@ -808,7 +773,7 @@ namespace REX } nodeHeader += ">"; } - void headWriter( bool incId ){ + void headWeight::headWriter( bool incId ){ if( !incId ){ headWriter(); return; } if( idTag == "" ){ headWriter(); return; } if( id == npos ){ nodeHeader = "getName() == "weight" ){ continue; } nodeContent += *(child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void headWeight::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } } - void fullWriter() override{ + void headWeight::fullWriter(){ if( isModded() || !isWritten() ){ headWriter(); contWriter(); @@ -845,7 +810,7 @@ namespace REX modded = false; } } - void fullWriter( bool incId, bool hasChildren=true ){ + void headWeight::fullWriter( bool incId, bool hasChildren ){ if( isModded() || !isWritten() ){ headWriter( incId ); contWriter(); @@ -856,27 +821,24 @@ namespace REX written = true; } } - }; // ZW: struct for handling rwgt groups // in the LHE header initrwgt node - struct weightGroup : public xmlNode { - public: - bool getIncId(){ return includeId; } - void setIncId( bool nuIncId ){ includeId = nuIncId; } - std::vector> getWgts(){ return paramSets; } - void addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } - void addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} - weightGroup() : xmlNode(){ name = "weightgroup"; return; } - weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( std::vector nuWgts ) : xmlNode(){ + bool weightGroup::getIncId(){ return includeId; } + void weightGroup::setIncId( bool nuIncId ){ includeId = nuIncId; } + std::vector> weightGroup::getWgts(){ return paramSets; } + void weightGroup::addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } + void weightGroup::addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} + weightGroup::weightGroup() : xmlNode(){ name = "weightgroup"; return; } + weightGroup::weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup::weightGroup( std::vector nuWgts ) : xmlNode(){ name = "weightgroup"; for( auto wgt : nuWgts ){ paramSets.push_back( std::make_shared( wgt ) ); } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + weightGroup::weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -885,7 +847,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + weightGroup::weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -894,7 +856,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + weightGroup::weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -903,7 +865,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + weightGroup::weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -912,7 +874,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + weightGroup::weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -921,7 +883,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + weightGroup::weightGroup( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode( originFile, begin, childs ){ name = "weightgroup"; if( parseTop() ){ @@ -934,69 +896,58 @@ namespace REX } } } - protected: - std::string_view rwgtName; - std::string_view wgtNamStrat; - bool includeId = false; - std::vector> paramSets; - bool nu; - std::string_view idTag; - int id; - void headWriter() override{ + void weightGroup::headWriter() { nodeHeader = "nodeWriter()); } } - void childWriter() override{ + void weightGroup::childWriter() { for(auto child : children){ if( child->getName() == "weight" ){ continue; } nodeContent += (*child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void weightGroup::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } return; } - void endWriter() override{ nodeEnd = "\n"; } - }; + void weightGroup::endWriter() { nodeEnd = "\n"; } - struct initRwgt : public xmlNode { - public: - std::vector> getGroups(){ return groups; } - size_t noGrps(){ return groups.size(); } - void addGroup( weightGroup nuGroup ){ + std::vector> initRwgt::getGroups(){ return groups; } + size_t initRwgt::noGrps(){ return groups.size(); } + void initRwgt::addGroup( weightGroup nuGroup ){ modded = true; auto nuGrpPtr = std::make_shared( nuGroup ); if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } } - void addGroup( std::shared_ptr nuGroup ){ + void initRwgt::addGroup( std::shared_ptr nuGroup ){ modded = true; if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } } - void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + void initRwgt::addWgt( unsigned int index, std::shared_ptr nuWgt ){ if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } else throw std::range_error( "Appending weight to uninitialised weightgroup." ); } - void addWgt( unsigned int index, headWeight nuWgt ){ + void initRwgt::addWgt( unsigned int index, headWeight nuWgt ){ if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } else throw std::range_error( "Appending weight to uninitialised weightgroup." ); } - initRwgt() : xmlNode(){ name = "initrwgt"; return; } - initRwgt( std::vector> nuGroups ) : xmlNode(){ + initRwgt::initRwgt() : xmlNode(){ name = "initrwgt"; return; } + initRwgt::initRwgt( std::vector> nuGroups ) : xmlNode(){ name = "initrwgt"; for( auto group : nuGroups ){ groups.push_back( std::make_shared( *group ) ); } } - initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + initRwgt::initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1004,7 +955,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + initRwgt::initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1012,7 +963,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + initRwgt::initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1020,7 +971,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + initRwgt::initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1028,9 +979,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - protected: - bool grpIsInit = false; - bool grpInit( std::shared_ptr& wgt ){ + bool initRwgt::grpInit( std::shared_ptr& wgt ){ if( grpIsInit ){ return true; } else{ groups = std::vector>( 1, wgt ); @@ -1038,112 +987,103 @@ namespace REX return false; } } - std::vector> groups; - void contWriter() override{ + void initRwgt::contWriter(){ nodeContent = "\n"; for( auto group : groups ){ nodeContent += (*group->nodeWriter()); } } - void childWriter() override{ + void initRwgt::childWriter(){ for( auto child : children ){ if( child->getName() == "weightgroup" ){ continue; } nodeContent += (*child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void initRwgt::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } return; } - }; // ZW: struct for handling weights // in event blocks of LHE files - struct bodyWgt : public xmlNode { - public: - void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } - void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} - void setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} - void setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} - void setId( std::string nuId ){ + void bodyWgt::setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } + void bodyWgt::setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} + void bodyWgt::setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} + void bodyWgt::setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} + void bodyWgt::setId( std::string nuId ){ modded = true; id = nuId; for( auto tag : tags ){ if( tag->getId() == "id" ){ tag->setVal( id ); return; } } addTag( std::make_shared( "id", id ) ); } - void setModded( bool nuModded ){ modded = nuModded; } - std::string_view getComment(){ return comment; } - std::string_view getValS(){ return valS; } - double getValD(){ return valD; } - bodyWgt() : xmlNode(){ return; } - bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + void bodyWgt::setModded( bool nuModded ){ modded = nuModded; } + std::string_view bodyWgt::getComment(){ return comment; } + std::string_view bodyWgt::getValS(){ return valS; } + double bodyWgt::getValD(){ return valD; } + bodyWgt::bodyWgt() : xmlNode(){ return; } + bodyWgt::bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode( originFile, begin, childs ){ auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); valD = std::stod( valS ); } - bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + bodyWgt::bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + bodyWgt::bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + bodyWgt::bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + bodyWgt::bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + bodyWgt::bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + bodyWgt::bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( double value, std::string& idTag ){ + bodyWgt::bodyWgt( double value, std::string& idTag ){ setVal( value ); id = idTag; addTag( std::make_shared("id",id) ); } - void appendWgt( std::shared_ptr document ){ + void bodyWgt::appendWgt( std::shared_ptr document ){ if( !isWritten() ){ fullWriter(); } *document += *writtenSelf; } - void appendWgt( std::string* document ){ + void bodyWgt::appendWgt( std::string* document ){ if( !isWritten() ){ fullWriter(); } *document += *writtenSelf; } - std::shared_ptr appendWgt( std::string_view document ){ + std::shared_ptr bodyWgt::appendWgt( std::string_view document ){ if(!isWritten() ){ fullWriter(); } auto retDoc = std::make_shared( document ); *retDoc += *writtenSelf; return retDoc; } - protected: - std::string_view comment; - std::string valS; - std::string id; - double valD; - void fullWriter() override { + void bodyWgt::fullWriter() { writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; @@ -1152,7 +1092,6 @@ namespace REX modded = false; written = true; } - }; // ZW: fcn for finding the next block in SLHA format // parameter cards @@ -1199,30 +1138,29 @@ namespace REX // ZW: struct for handling the first line of // LHE format event block - struct evHead { - public: - std::string_view getComment(){ return comment; } - std::string_view getWeight(){ return weight; } - std::string_view getScale(){ return scale; } - std::string_view getAQED(){ return aqed; } - std::string_view getAQCD(){ return aqcd; } - std::string_view getNprt(){ return nprt; } - std::string_view getProcID(){ return procid; } - bool isModded(){ return modded; } - bool isWritten(){ return written; } - void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } - void setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } - void setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } - void setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } - void setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } - void setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } - std::shared_ptr getContent(){ + std::string_view evHead::getComment(){ return comment; } + std::string_view evHead::getWeight(){ return weight; } + std::string_view evHead::getScale(){ return scale; } + std::string_view evHead::getAQED(){ return aqed; } + std::string_view evHead::getAQCD(){ return aqcd; } + std::string_view evHead::getNprt(){ return nprt; } + std::string_view evHead::getProcID(){ return procid; } + bool evHead::isModded(){ return modded; } + bool evHead::isWritten(){ return written; } + void evHead::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void evHead::setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } + void evHead::setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } + void evHead::setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } + void evHead::setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } + void evHead::setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } + void evHead::setNprt( int nuNprt ){ modded = true; nprtint = nuNprt; nprtstr = std::to_string(nuNprt); nprt = nprtstr;} + void evHead::setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } + std::shared_ptr evHead::getContent(){ if( !isWritten() || isModded() ){ writer(); } return content; } - evHead(){ return; } - evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) + evHead::evHead(){ return; } + evHead::evHead( const std::string_view originFile, size_t beginLine, size_t endLine ) { if( originFile.size() == 0){ return; } beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); @@ -1236,19 +1174,7 @@ namespace REX aqed = evLine->at(4); aqcd = evLine->at(5); } - protected: - std::shared_ptr content; - std::string_view sourceFile; - std::string_view comment; - std::string_view weight; - std::string_view scale; - std::string_view aqed; - std::string_view aqcd; - std::string_view nprt; - std::string_view procid; - bool modded = false; - bool written = false; - void writer(){ + void evHead::writer(){ if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } auto retText = std::make_shared( " " ); @@ -1260,49 +1186,46 @@ namespace REX modded = false; written = true; } - }; // ZW: struct for handling particle lines // in LHE format event block - struct lhePrt{ - public: - std::string_view getLine(){ return sourceFile; } - std::string_view getComment(){ return comment; } - std::vector getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } - std::string_view getE(){ return energy; } - std::string_view getMass(){ return mass; } - std::string_view getVTim(){ return vtim; } - std::string_view getSpin(){ return spin; } - std::string_view getPDG(){ return pdg; } - std::string_view getStatus(){ return status; } - std::vector getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } - std::vector getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } - void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } - void setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } - void setMass( std::string_view nuM ){ modded = true; mass = nuM; } - void setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } - void setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } - void setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } - void setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } - void setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } - void setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } - bool isModded(){ return modded; } - bool isWritten(){ return written; } - std::shared_ptr getContent(){ + std::string_view lhePrt::getLine(){ return sourceFile; } + std::string_view lhePrt::getComment(){ return comment; } + std::vector lhePrt::getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } + std::string_view lhePrt::getE(){ return energy; } + std::string_view lhePrt::getMass(){ return mass; } + std::string_view lhePrt::getVTim(){ return vtim; } + std::string_view lhePrt::getSpin(){ return spin; } + std::string_view lhePrt::getPDG(){ return pdg; } + std::string_view lhePrt::getStatus(){ return status; } + std::vector lhePrt::getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } + std::vector lhePrt::getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } + void lhePrt::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void lhePrt::setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } + void lhePrt::setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } + void lhePrt::setMass( std::string_view nuM ){ modded = true; mass = nuM; } + void lhePrt::setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } + void lhePrt::setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } + void lhePrt::setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } + void lhePrt::setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } + void lhePrt::setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } + void lhePrt::setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } + bool lhePrt::isModded(){ return modded; } + bool lhePrt::isWritten(){ return written; } + std::shared_ptr lhePrt::getContent(){ if( !isWritten() || isModded() ){ writer(); } return content; } - lhePrt(){ return; } - // lhePrt( std::pair prtInfo ){ - // status = std::to_string( prtInfo.first ); - // pdg = std::to_string( prtInfo.second ); - // } - lhePrt( std::pair& prtInfo ){ + lhePrt::lhePrt(){ return; } + lhePrt::lhePrt( std::pair& prtInfo ){ status = std::to_string( prtInfo.first ); pdg = std::to_string( prtInfo.second ); } - lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) + lhePrt::lhePrt( std::pair& prtInfo ){ + status = std::string_view( prtInfo.first ); + pdg = std::string_view( prtInfo.second ); + } + lhePrt::lhePrt( const std::string_view originFile, const size_t& beginLine, const size_t& endLine ) { sourceFile = originFile.substr( beginLine, endLine - beginLine ); auto evLine = nuWordSplitter( sourceFile ); @@ -1319,22 +1242,7 @@ namespace REX spin = evLine->at(12); if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } } - protected: - std::shared_ptr content; - std::string_view sourceFile; - std::string_view comment; - std::string_view mom[3]; - std::string_view energy; - std::string_view mass; - std::string_view vtim; - std::string_view spin; - std::string_view pdg; - std::string_view status; - std::string_view mothers[2]; - std::string_view icol[2]; - bool modded = false; - bool written = false; - void writer(){ + void lhePrt::writer(){ if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } *content = ""; @@ -1349,26 +1257,23 @@ namespace REX modded = false; written = true; } - }; // ZW: struct for handling LHE format event block - struct event : public xmlNode { - public: - evHead getHead(){ return header; } - std::vector> getPrts(){ return prts; } - std::vector> getWgts(){ return rwgt; } - void setHead( evHead head ){ modded = true; header = head; } - void addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } - void addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } - void setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } - void addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } - void addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } - void addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } - void addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } - bool newWeight(){ return addedWgt; } - int getNprt(){ return prts.size(); } - bool isModded() override{ return modded; } - bool isModded( bool deep ) override { + evHead event::getHead(){ return header; } + std::vector> event::getPrts(){ return prts; } + std::vector> event::getWgts(){ return rwgt; } + void event::setHead( evHead head ){ modded = true; header = head; } + void event::addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } + void event::addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } + void event::setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } + void event::addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } + void event::addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } + bool event::newWeight(){ return addedWgt; } + int event::getNprt(){ return prts.size(); } + bool event::isModded() { return modded; } + bool event::isModded( bool deep ) { if( !deep ){ return modded; } bool modStat = modded; for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } @@ -1377,18 +1282,24 @@ namespace REX for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } return modStat; } - event(){ return; } - event( std::vector>& prtInfo ){ + event::event(){ return; } + event::event( std::vector>& prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); for( auto& prt : prtInfo ){ prts.push_back( std::make_shared( prt ) ); } } - event( std::vector> prtInfo ){ + event::event( std::vector>& prtInfo ){ + header.setNprt( prtInfo.size() ); + for( auto& prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event::event( std::vector> prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); prts = prtInfo; } - event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + event::event( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs) { xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); if( trueStart == npos ){ return; } @@ -1400,7 +1311,7 @@ namespace REX prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); } } - event( const xmlNode& originFile ) + event::event( const xmlNode& originFile ) : xmlNode( originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1411,7 +1322,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( const xmlNode* originFile ) + event::event( const xmlNode* originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1422,7 +1333,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( const std::shared_ptr& originFile ) + event::event( const std::shared_ptr& originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1433,7 +1344,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( xmlTree& originFile ) + event::event( xmlTree& originFile ) : xmlNode( originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1444,7 +1355,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( xmlTree* originFile ) + event::event( xmlTree* originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1455,7 +1366,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( std::shared_ptr originFile ) + event::event( std::shared_ptr originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1466,39 +1377,28 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - bool prtsAreMod(){ + bool event::prtsAreMod(){ for( auto prt : prts ){ if( prt->isModded() ){ return true; } } return false; } - bool headIsMod(){ + bool event::headIsMod(){ return header.isModded(); } - bool isSpecSort() const { return specSorted; } - sortFcn getSortFcn() const { return eventSort; } - statSort getStatSort() const { return specSort; } - protected: - std::vector> rwgt; - std::shared_ptr childRwgt; - bool hasRwgt(){ + bool event::isSpecSort() const { return specSorted; } + sortFcn event::getSortFcn() const { return eventSort; } + statSort event::getStatSort() const { return specSort; } + bool event::hasRwgt(){ if( rwgt.size() > 0 ){ return true; } return false; } - bool rwgtChild(){ + bool event::rwgtChild(){ if( childRwgt != nullptr ){ return true; } for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } return false; } - bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } - bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } - evHead header; - bool hasBeenProc = false; - std::vector> prts; - std::map> procMap; - std::map> procOrder; - sortFcn eventSort = []( std::vector vec ){ return stodSort( vec ); }; - statSort specSort = []( std::string_view stat, std::vector vec ){ return stodSort( vec ); }; - bool specSorted = false; - bool initProcMap(bool hard = false) + bool event::bothRwgt(){ return (hasRwgt() && rwgtChild() ); } + bool event::eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } + bool event::initProcMap(bool hard) { if(!hard){ if( procMap.size() > 0 ){ return true; } } for( auto prt : prts ){ @@ -1514,7 +1414,7 @@ namespace REX hasBeenProc = true; return true; } - bool initProcMap( sortFcn sorter, bool hard = false ) + bool event::initProcMap( sortFcn sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } specSorted = false; @@ -1532,7 +1432,7 @@ namespace REX hasBeenProc = true; return true; } - bool initProcMap( statSort sorter, bool hard = false ) + bool event::initProcMap( statSort sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } specSorted = true; @@ -1550,29 +1450,29 @@ namespace REX hasBeenProc = true; return true; } - bool inRwgtChild( std::string_view name ){ + bool event::inRwgtChild( std::string_view name ){ for( auto child : childRwgt->getChildren() ){ for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } } return false; } - bool checkRwgtOverlap(){ + bool event::checkRwgtOverlap(){ for( auto wgt : rwgt ){ for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } } return false; } - void childRwgtWriter(){ + void event::childRwgtWriter(){ if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } } - void vecRwgtWriter( bool midNode = false ){ + void event::vecRwgtWriter( bool midNode ){ if( !midNode ){ nodeContent += "\n"; } for( auto wgt : rwgt ){ nodeContent += *wgt->nodeWriter(); } nodeContent += "\n"; } - void rwgtWriter(){ + void event::rwgtWriter(){ if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } childRwgtWriter(); nodeContent.erase( nodeContent.size() - 8, 8 ); @@ -1583,20 +1483,19 @@ namespace REX if( rwgtChild() ){ childRwgtWriter(); return; } } } - void contWriter() override { + void event::contWriter() { nodeContent = "\n" + *header.getContent(); for( auto prt : prts ){ nodeContent += *prt->getContent(); } } - void childWriter() override { + void event::childWriter() { for( auto child : children ){ if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } nodeContent += *child->nodeWriter(); } } - bool addedWgt = false; - void fullWriter() override { + void event::fullWriter() { if( isModded( false ) ){ headWriter(); contWriter(); @@ -1610,7 +1509,7 @@ namespace REX written = true; } } - void fullWriter( bool deep ){ + void event::fullWriter( bool deep ){ if( !deep ){ fullWriter(); return; } if( isModded( true ) ){ headWriter(); @@ -1626,7 +1525,7 @@ namespace REX written = true; } } - void appendWgts(){ + void event::appendWgts(){ if( !addedWgt ){ return; } writtenSelf->erase( writtenSelf->size() - 17, 17 ); for( auto wgt : rwgt ){ @@ -1634,67 +1533,66 @@ namespace REX } *writtenSelf += "\n
\n"; } - public: - std::shared_ptr nodeWriter() override { + std::shared_ptr event::nodeWriter() { if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } if( addedWgt ){ appendWgts(); } return writtenSelf; } - std::shared_ptr nodeWriter( bool recursive ){ + std::shared_ptr event::nodeWriter( bool recursive ){ if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } if( addedWgt ){ appendWgts(); } return writtenSelf; } - auto &getProc(){ + std::map> &event::getProc(){ if( initProcMap() ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(){ + std::map> &event::getProcOrder(){ if( initProcMap() ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProc() const{ + std::map> event::getProc() const { if ( hasBeenProc ){ return procMap; } else throw std::runtime_error("Const declaration of event node before it has been procesed."); } - auto &getProcOrder() const{ + std::map> event::getProcOrder() const { if ( hasBeenProc ){ return procOrder; } else throw std::runtime_error("Const declaration of event node before it has been procesed."); } - auto &getProc(sortFcn sorter){ + std::map> &event::getProc(sortFcn sorter){ if( initProcMap(sorter) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(sortFcn sorter){ + std::map> &event::getProcOrder(sortFcn sorter){ if( initProcMap(sorter) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProc(statSort sorter){ + std::map> &event::getProc(statSort sorter){ if( initProcMap(sorter) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(statSort sorter){ + std::map> &event::getProcOrder(statSort sorter){ if( initProcMap(sorter) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - }; + + event& makeEv( std::vector>& particles ){ + auto returnEvent = event( particles ); + return returnEvent; + } + + std::vector> getParticles( event& ev ){ + return ev.getPrts(); + } // ZW: struct for handling the first line of // LHE format init tag - struct lheInitHead{ - public: - std::string_view idbmup[2]; - std::string_view ebmup[2]; - std::string_view pdfgup[2]; - std::string_view pdfsup[2]; - std::string_view idwtup; - std::string_view nprup; - bool isWritten(){ return written; } - bool isModded(){ return modded; } - std::shared_ptr getContent(){ + bool lheInitHead::isWritten(){ return written; } + bool lheInitHead::isModded(){ return modded; } + std::shared_ptr lheInitHead::getContent(){ if( isModded() || !isWritten() ){ writer(); } return content; } - lheInitHead( std::string_view initHead ){ + lheInitHead::lheInitHead( std::string_view initHead ){ auto vals = *nuBlankSplitter( initHead ); if( vals.size() < 10 ){ return; } idbmup[0] = vals[0]; idbmup[1] = vals[1]; @@ -1703,7 +1601,7 @@ namespace REX pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; idwtup = vals[8]; nprup = vals[9]; } - lheInitHead( xmlNode& initNode ) + lheInitHead::lheInitHead( xmlNode& initNode ) { if( initNode.getName() != "init" ){ return; } auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; @@ -1715,33 +1613,22 @@ namespace REX pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; idwtup = vals[8]; nprup = vals[9]; } - protected: - std::shared_ptr content; - bool written = false; - bool modded = false; - void writer(){ + void lheInitHead::writer(){ *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; written = true; modded = false; } - }; // ZW: struct for handling process lines // in LHE format init tag - struct lheInitLine { - public: - std::string_view xsecup; - std::string_view xerrup; - std::string_view xmaxup; - std::string_view lprup; - bool isWritten(){ return written; } - bool isModded(){ return modded; } - std::shared_ptr getContent(){ + bool lheInitLine::isWritten(){ return written; } + bool lheInitLine::isModded(){ return modded; } + std::shared_ptr lheInitLine::getContent(){ if( isModded() || !isWritten() ){ writer(); } return content; } - lheInitLine(){} - lheInitLine( std::string_view procLine ) + lheInitLine::lheInitLine(){} + lheInitLine::lheInitLine( std::string_view procLine ) { auto vals = *nuBlankSplitter( procLine ); if( vals.size() < 4 ){ return; } @@ -1750,33 +1637,20 @@ namespace REX xmaxup = vals[2]; lprup = vals[3]; } - protected: - std::shared_ptr content; - bool written = false; - bool modded = false; - void writer(){ + void lheInitLine::writer(){ *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; written = true; modded = false; } - }; // ZW: struct for handling single parameter line in // SLHA format parameter card - struct paramVal{ - public: - double value = 0; - int id = 0; - std::string_view realLine; - std::string_view comment; - std::string_view idStr; - std::string_view valStr; - virtual void parse(){ + void paramVal::parse(){ id = std::stoi( std::string(idStr) ); value = std::stod( std::string(valStr) ); } - paramVal(){ realLine = ""; idStr = ""; valStr = ""; } - paramVal( std::string_view paramLine, bool parseOnline = false ) + paramVal::paramVal(){ realLine = ""; idStr = ""; valStr = ""; } + paramVal::paramVal( std::string_view paramLine, bool parseOnline ) { if( paramLine.find("\n") != npos ){ auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); @@ -1800,9 +1674,8 @@ namespace REX } parse(); } } - bool isMod(){ return modded; } - bool modded = false; - virtual std::shared_ptr selfWrite(){ + bool paramVal::isMod(){ return modded; } + std::shared_ptr paramVal::selfWrite(){ auto writeVal = std::make_shared(""); if( isMod() ) { @@ -1816,13 +1689,10 @@ namespace REX else{ *writeVal = std::string( realLine ) + "\n"; } return writeVal; } - }; // ZW: struct for handling single DECAY line // in SLHA format parameter card - struct decVal : public paramVal{ - public: - void parse() override { + void decVal::parse() { auto vals = *nuBlankSplitter( realLine ); id = std::stoi( std::string(vals[1]) ); value = std::stod( std::string(vals[2]) ); @@ -1832,11 +1702,11 @@ namespace REX comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); } } - decVal( std::string_view paramLine = "", bool parseOnline = false ) : paramVal( paramLine, false ) + decVal::decVal( std::string_view paramLine, bool parseOnline ) : paramVal( paramLine, false ) { if( parseOnline ){ parse(); } } - std::shared_ptr selfWrite() override { + std::shared_ptr decVal::selfWrite() { auto writeVal = std::make_shared(""); if( isMod() ) { @@ -1849,19 +1719,10 @@ namespace REX else{ *writeVal = std::string( realLine ) + "\n"; } return writeVal; } - }; // ZW: struct for handling parameter block // in SLHA format parameter card - struct paramBlock { - public: - std::string_view realBlock; - size_t startPt; - std::string_view comment; - std::string_view initComm; - std::string_view name; - std::vector params; - virtual void parse( bool parseOnline = false ){ + void paramBlock::parse( bool parseOnline ){ if( realBlock.size() == 0 ){ return; } if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); @@ -1875,16 +1736,15 @@ namespace REX params.push_back( paramVal( line, parseOnline ) ); } } - paramBlock(){ return; } - paramBlock( std::string_view paramSet, bool parseOnline = false ) + paramBlock::paramBlock(){ return; } + paramBlock::paramBlock( std::string_view paramSet, bool parseOnline ) { realBlock = paramSet; startPt = clStringFind( realBlock, std::string("\nB") ); if( parseOnline ){ parse(parseOnline); } } - bool isMod(){ return modded; } - bool modded = false; - virtual std::shared_ptr selfWrite(){ + bool paramBlock::isMod(){ return modded; } + std::shared_ptr paramBlock::selfWrite(){ auto writeBlock = std::make_shared(""); if( isMod() ) { @@ -1905,14 +1765,10 @@ namespace REX } } return writeBlock; } - }; // ZW: struct for handling DECAY lines // in SLHA format parameter card - struct decBlock : public paramBlock { - public: - std::vector decays; - void parse( bool parseOnline = false ) override{ + void decBlock::parse( bool parseOnline ){ if( realBlock.size() == 0 ){ return; } auto decLines = clFindEach( realBlock, std::string("\ndecay") ); decays.reserve(decLines->size()); @@ -1925,7 +1781,7 @@ namespace REX decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); } } - void parse( std::shared_ptr> decLines, bool parseOnline = false ) { + void decBlock::parse( std::shared_ptr> decLines, bool parseOnline ) { decays.reserve(decLines->size()); if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } @@ -1936,12 +1792,12 @@ namespace REX decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); } } - decBlock( std::string_view paramSet = "", bool parseOnline = false ) : paramBlock( paramSet, parseOnline ) + decBlock::decBlock( std::string_view paramSet, bool parseOnline ) : paramBlock( paramSet, parseOnline ) { realBlock = paramSet; if( parseOnline ){ parse(parseOnline); } } - std::shared_ptr selfWrite() override { + std::shared_ptr decBlock::selfWrite() { auto writeBlock = std::make_shared(""); *writeBlock += "\n"; for ( auto val : decays ) @@ -1950,27 +1806,9 @@ namespace REX } return writeBlock; } - }; // ZW: struct for handling SLHA parameter cards - struct lesHouchesCard { - public: - decBlock decays; - std::string_view xmlFile; - size_t start; - size_t end; - bool modded; - bool parsed; - std::string_view header; - std::vector blocks; - size_t blockStart; - std::function lambda = [&]( size_t& conPt, const std::string_view& file ) - { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; - std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) - { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; - std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) - { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; - void parse( bool parseOnline = false ) + void lesHouchesCard::parse( bool parseOnline ) { if( parsed ){ return; } if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } @@ -1987,14 +1825,14 @@ namespace REX decays.parse( decLines, parseOnline ); parsed = true; } - lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ + lesHouchesCard::lesHouchesCard( const std::string_view originFile, const size_t& begin, bool parseOnline ){ xmlFile = originFile; start = begin; modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" selfWrite(){ + bool lesHouchesCard::isMod(){ return modded; } + std::shared_ptr lesHouchesCard::selfWrite(){ auto writeCard = std::make_shared(header); if( isMod() ) { for( auto block : blocks ) @@ -2006,106 +1844,94 @@ namespace REX } return writeCard; } - }; - struct slhaNode : public xmlNode { - public: - std::shared_ptr getParameters(){ + std::shared_ptr slhaNode::getParameters(){ modded = true; return parameterCard; } - slhaNode() : xmlNode(){} - slhaNode( lesHouchesCard parameters ) : xmlNode(){ + slhaNode::slhaNode() : xmlNode(){} + slhaNode::slhaNode( lesHouchesCard parameters ) : xmlNode(){ parameterCard = std::make_shared( parameters ); pCardInit = true; } - slhaNode( std::shared_ptr parameters ) : xmlNode(){ + slhaNode::slhaNode( std::shared_ptr parameters ) : xmlNode(){ parameterCard = parameters; pCardInit = true; } - slhaNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + slhaNode::slhaNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); } - slhaNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + slhaNode::slhaNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } - slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + slhaNode::slhaNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } - slhaNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + slhaNode::slhaNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); } - slhaNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + slhaNode::slhaNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); } - slhaNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + slhaNode::slhaNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); } - slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + slhaNode::slhaNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } } - protected: - std::shared_ptr parameterCard; - bool pCardInit = false; - void headWriter() override{ + void slhaNode::headWriter(){ nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; } nodeHeader += ">"; } - void endWriter() override{ nodeEnd += "\n"; } - void contWriter() override{ + void slhaNode::endWriter(){ nodeEnd += "\n"; } + void slhaNode::contWriter(){ if( pCardInit ){ nodeContent = *parameterCard->selfWrite(); } else { nodeContent = content; } } - }; // ZW: struct for handling LHE init nodes - struct initNode : public xmlNode { - public: - std::shared_ptr getHead(){ return initHead; } - std::vector> getLines(){ return initLines; } - void setHead( std::shared_ptr head ){ modded = true; initHead = head; } - void setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } - void addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } - initNode() : xmlNode(){ name = "init"; } - initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + std::shared_ptr initNode::getHead(){ return initHead; } + std::vector> initNode::getLines(){ return initLines; } + void initNode::setHead( std::shared_ptr head ){ modded = true; initHead = head; } + void initNode::setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } + void initNode::addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } + initNode::initNode() : xmlNode(){ name = "init"; } + initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); } - initNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + initNode::initNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + initNode::initNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + initNode::initNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + initNode::initNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + initNode::initNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - protected: - std::shared_ptr initHead; - std::vector> initLines; - bool parseContent() override{ + bool initNode::parseContent(){ if( content.size() == 0 ){ return false; } auto linebreaks = lineFinder( content ); if( linebreaks->size() == 0 ){ return false; } @@ -2115,19 +1941,16 @@ namespace REX } return true; } - void contWriter() override{ + void initNode::contWriter(){ if( isModded() ){nodeContent = std::string( content ); return; } nodeContent = *initHead->getContent(); for( auto line : initLines ){ nodeContent += *line->getContent(); } } - }; // ZW: struct for explicitly handling LHE header nodes - struct lheHead : public xmlNode { - public: - size_t addWgtGroup( std::shared_ptr& wgtGroup ){ + size_t lheHead::addWgtGroup( std::shared_ptr& wgtGroup ){ hasRwgt = true; modded = true; if( wgtGrpInit( wgtGroup ) ){ @@ -2135,7 +1958,7 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - size_t addWgtGroup( weightGroup wgtGroup ){ + size_t lheHead::addWgtGroup( weightGroup wgtGroup ){ hasRwgt = true; modded = true; auto wgtGrpPtr = std::make_shared( wgtGroup ); @@ -2144,21 +1967,21 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - void addWgt( size_t index, std::shared_ptr nuWgt ){ + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, headWeight nuWgt ){ + void lheHead::addWgt( size_t index, headWeight nuWgt ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; @@ -2166,7 +1989,7 @@ namespace REX nuWgt->setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ + void lheHead::addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; @@ -2174,15 +1997,15 @@ namespace REX nuWgt.setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } - void setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } - std::vector> getWgtGroups(){ return rwgtNodes->getGroups(); } - std::shared_ptr getInitRwgt(){ return rwgtNodes; } - std::shared_ptr getParameters(){ return parameters; } - void setParameters( std::shared_ptr params ){ parameters = params; } - bool rwgtInc(){ return hasRwgt; } - lheHead(){ return; } - lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + void lheHead::setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } + void lheHead::setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } + std::vector> lheHead::getWgtGroups(){ return rwgtNodes->getGroups(); } + std::shared_ptr lheHead::getInitRwgt(){ return rwgtNodes; } + std::shared_ptr lheHead::getParameters(){ return parameters; } + void lheHead::setParameters( std::shared_ptr params ){ parameters = params; } + bool lheHead::rwgtInc(){ return hasRwgt; } + lheHead::lheHead(){ return; } + lheHead::lheHead( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs){ xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} @@ -2191,45 +2014,43 @@ namespace REX if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlNode& node ) : xmlNode(node){ + lheHead::lheHead( xmlNode& node ) : xmlNode(node){ for( auto child : node.getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlNode* node ) : xmlNode(*node){ + lheHead::lheHead( xmlNode* node ) : xmlNode(*node){ for( auto child : node->getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( std::shared_ptr node ) : xmlNode( *node ){ + lheHead::lheHead( std::shared_ptr node ) : xmlNode( *node ){ for( auto child : node->getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlTree tree ) : xmlNode( tree ){ + lheHead::lheHead( xmlTree tree ) : xmlNode( tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ + lheHead::lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlTree* tree ) : xmlNode( *tree ){ + lheHead::lheHead( xmlTree* tree ) : xmlNode( *tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - protected: - bool wgtGrpIsInit = false; - bool wgtGrpInit( std::shared_ptr& wgtGrp ){ + bool lheHead::wgtGrpInit( std::shared_ptr& wgtGrp ){ if( wgtGrpIsInit ){ return true; } if( rwgtNodes == nullptr ){ rwgtNodes = std::make_shared(); @@ -2238,13 +2059,7 @@ namespace REX return false; } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); } - std::shared_ptr parameters; - bool hasRwgt = false; - std::shared_ptr rwgtNodes; - std::vector> initrwgt; - bool relChildSet = false; - std::vector relChild; - void setRelChild(){ + void lheHead::setRelChild(){ if( relChildSet ){ return; } relChild.reserve( children.size() ); for( size_t k = 0 ; k < children.size() ; ++k ){ @@ -2255,7 +2070,7 @@ namespace REX } relChildSet = true; } - bool parseChildren( bool recursive ){ + bool lheHead::parseChildren( bool recursive ){ bool status = true; for( auto child : children ){ if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } @@ -2265,14 +2080,14 @@ namespace REX } return status; } - void headWriter() override{ + void lheHead::headWriter(){ nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; } nodeHeader += ">\n"; } - void childWriter() override{ + void lheHead::childWriter(){ setRelChild(); for( auto relKid : relChild ){ nodeContent += *(children[relKid]->nodeWriter()); @@ -2282,7 +2097,7 @@ namespace REX nodeContent += *rwgtNodes->nodeWriter(); } } - void fullWriter() override{ + void lheHead::fullWriter(){ if( isModded() ){ headWriter(); contWriter(); @@ -2292,20 +2107,14 @@ namespace REX written = true; } } - }; // ZW: struct for keeping track of appended weights in LHE node, // since weight information is stored both in the header // and in the individual events - struct newWgt{ - protected: - std::shared_ptr headWgt; - std::vector> bodyWgts; - public: - newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ + newWgt::newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ headWgt = heaWgt; bodyWgts = bodWgts; } - newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ + newWgt::newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ headWgt = heaWgt; bodyWgts = std::vector>(wgts->size()); auto idTag = std::string(headWgt->getTag()); @@ -2319,14 +2128,14 @@ namespace REX } } } - newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + newWgt::newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag ){ headWgt = std::make_shared(parameters, idTag); bodyWgts = std::vector>(wgts->size()); for( size_t i = 0 ; i < wgts->size() ; ++i ){ bodyWgts[i] = std::make_shared(wgts->at(i), idTag); } } - newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + newWgt::newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag ){ std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); headWgt = std::make_shared(parameters, newTag); bodyWgts = std::vector>(wgts->size()); @@ -2334,15 +2143,15 @@ namespace REX bodyWgts[i] = std::make_shared(wgts->at(i), newTag); } } - newWgt( std::string& parameters ){ + newWgt::newWgt( std::string& parameters ){ headWgt = std::make_shared(parameters); } - newWgt( std::string& parameters, std::string& idTag ){ + newWgt::newWgt( std::string& parameters, std::string& idTag ){ headWgt = std::make_shared(parameters, idTag); } - std::shared_ptr getHeadWgt(){ return headWgt; } - std::vector> getBodyWgts(){ return bodyWgts; } - void addBdyWgts( std::shared_ptr> wgts ){ + std::shared_ptr newWgt::getHeadWgt(){ return headWgt; } + std::vector> newWgt::getBodyWgts(){ return bodyWgts; } + void newWgt::addBdyWgts( std::shared_ptr> wgts ){ auto idTag = std::string(headWgt->getTag()); if( idTag != "" ){ for( size_t i = 0 ; i < wgts->size() ; ++i ){ @@ -2354,13 +2163,10 @@ namespace REX } } } - }; // ZW: general struct for handling LHE files explicitly - struct lheNode : public xmlNode { - public: - lheNode() : xmlNode(){} - lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + lheNode::lheNode() : xmlNode(){} + lheNode::lheNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs){ //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} @@ -2370,76 +2176,69 @@ namespace REX if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } } } - auto getHeader(){ return header; } - auto getInit(){ return init; } - auto& getEvents(){ return events; } - bool isModded() override{ return modded; } - bool isModded( bool deep ) override{ + std::shared_ptr lheNode::getHeader(){ return header; } + std::shared_ptr lheNode::getInit(){ return init; } + std::vector> lheNode::getEvents(){ return events; } + bool lheNode::isModded(){ return modded; } + bool lheNode::isModded( bool deep ){ if( !deep ){ return isModded(); } bool modStat = isModded(); for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } return modStat; } - void setInit( std::shared_ptr initNod ){ init = initNod; } - void setHeader( std::shared_ptr headNod ){ header = headNod; } - void addWgt( size_t index, newWgt& addedWgt ){ + void lheNode::setInit( std::shared_ptr initNod ){ init = initNod; } + void lheNode::setHeader( std::shared_ptr headNod ){ header = headNod; } + void lheNode::addWgt( size_t index, newWgt& addedWgt ){ header->addWgt( index, addedWgt.getHeadWgt() ); auto wgtsVec = addedWgt.getBodyWgts(); for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } - void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ + void lheNode::addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ header->addWgt( index, addedWgt.getHeadWgt(), idTag ); auto wgtsVec = addedWgt.getBodyWgts(); for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } - void setRelStats( std::vector& particles ){ + void lheNode::setRelStats( std::vector& particles ){ relStat = particles; } - std::vector& getRelStats(){ + std::vector& lheNode::getRelStats(){ return relStat; } - void setSameSort( sortFcn& sortF ){ + void lheNode::setSameSort( sortFcn& sortF ){ particleSort = sortF; } - sortFcn& getSameSort(){ + sortFcn& lheNode::getSameSort(){ return particleSort; } - void setStatSort( statSort& statS ){ + void lheNode::setStatSort( statSort& statS ){ statParticleSort = statS; } - statSort& getStatSort(){ + statSort& lheNode::getStatSort(){ return statParticleSort; } - protected: - std::vector> events = {}; - std::shared_ptr header = std::make_shared(xmlFile, start); - std::shared_ptr init = std::make_shared(xmlFile, start); - std::vector relStat = {"-1", "1"}; - sortFcn particleSort = []( std::vector prts ){ return stodSort(prts); }; - statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stodSort(prts); }; - virtual void headerWriter(){ + void lheNode::headerWriter(){ nodeContent += "\n" + *header->nodeWriter(); } - virtual void initWriter(){ + void lheNode::initWriter(){ nodeContent += *init->nodeWriter(); } - virtual void eventWriter(){ + void lheNode::eventWriter(){ for( auto event : events ){ nodeContent += *event->nodeWriter(); } } - void contWriter() override{ + void lheNode::contWriter(){ nodeContent = ""; headerWriter(); initWriter(); eventWriter(); } - void fullWriter() override{ + void lheNode::fullWriter(){ if( isModded( true ) ){ headWriter(); contWriter(); @@ -2452,12 +2251,10 @@ namespace REX written = true; } } - public: - virtual std::shared_ptr nodeWriter() { + std::shared_ptr lheNode::nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; } - }; // ZW: function for extracting event information from // LHE files @@ -2783,20 +2580,18 @@ namespace REX return true; } - struct eventComp{ - bool operator()( event& firstEv, event& secEv){ + bool eventComp::operator()( event& firstEv, event& secEv){ if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} } - bool operator()( const event& firstEv, const event& secEv) const { + bool eventComp::operator()( const event& firstEv, const event& secEv) const { if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} } - bool operator()(event& firstEv, event& secEv, std::vector statVec){ + bool eventComp::operator()(event& firstEv, event& secEv, std::vector statVec){ if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} } - }; // ZW: fcn for checking whether a list of pdgXtract format // processes sourceProcList contains a given process newProc @@ -3158,16 +2953,7 @@ namespace REX } // ZW: transposed event information struct - struct evtInfo { - public: - std::vector wgts; - std::vector scales; - std::vector aQEDs; - std::vector aQCDs; - std::vector nprts; - std::vector relNPrts; - std::vector procIDs; - evtInfo( const std::vector>& lheFile = {} ){ + evtInfo::evtInfo( const std::vector>& lheFile ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) @@ -3180,7 +2966,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) @@ -3195,7 +2981,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec, + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, sortFcn sorter ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); @@ -3211,7 +2997,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec, + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, statSort sorter ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); @@ -3227,20 +3013,9 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - }; // ZW: transposed particle information struct - struct prtInfo { - public: - std::vector moms; - std::vector masses; - std::vector vtims; - std::vector spins; - std::vector statuses; - std::vector mothers; - std::vector icols; - std::vector pdgs; - prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); @@ -3264,7 +3039,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); @@ -3292,7 +3067,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, sortFcn sorter ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); @@ -3321,7 +3096,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, statSort sorter ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); @@ -3350,49 +3125,36 @@ namespace REX } } } - }; // ZW: transposed LHE file with a single process type - struct transMonoLHE { - public: - evtInfo evtsHead; - prtInfo evtsData; - std::shared_ptr process; - transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + transMonoLHE::transMonoLHE( const std::vector>& lheFile , const int nPrt ){ evtsHead = evtInfo(lheFile); evtsData = prtInfo(lheFile, nPrt); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, sortFcn sorter, - std::vector statVec = { "-1", "1" } ){ + std::vector statVec ){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec, sorter); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, statSort sorter, - std::vector statVec = { "-1", "1" } ){ + std::vector statVec){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec, sorter); process = lheFile[0]; } - }; // ZW: transposed LHE file ordered by subprocess - struct transLHE { - public: - std::string_view xmlFile; - std::vector> subProcs; - std::vector> procSets; - std::vector>> relProcs; - transLHE(){ return; } - transLHE( lheNode& lheFile ) + transLHE::transLHE(){ return; } + transLHE::transLHE( lheNode& lheFile ) { procSets = evProcessPull( lheFile ); relProcs = evProcOrder( lheFile, procSets ); @@ -3404,9 +3166,9 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); } } - transLHE( lheNode& lheFile, + transLHE::transLHE( lheNode& lheFile, sortFcn sorter, - const std::vector& statVec = { "-1", "1" } ) + const std::vector& statVec ) { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); @@ -3418,9 +3180,9 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); } } - transLHE( lheNode& lheFile, + transLHE::transLHE( lheNode& lheFile, statSort sorter, - const std::vector& statVec = { "-1", "1" } ) + const std::vector& statVec) { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); @@ -3432,7 +3194,7 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); } } - transLHE( lheNode& lheFile, const std::vector& statVec ) + transLHE::transLHE( lheNode& lheFile, const std::vector& statVec ) { procSets = evProcessPull( lheFile, statVec ); relProcs = evProcOrder( lheFile, procSets, statVec ); @@ -3444,15 +3206,15 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); } } - template - std::shared_ptr> vectorFlat( std::vector>> vecVec ) +// template + std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) { if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); for( size_t k = 0 ; k < vecVec.size() ; ++k){ if( vecVec[k]->size() == relProcs[k]->size() ) continue; else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); } - auto flatVec = std::make_shared>(relProcs[0]->size()); + auto flatVec = std::make_shared>(relProcs[0]->size()); for( size_t k = 0 ; k < relProcs.size() ; ++k ){ size_t currInd = 0; for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ @@ -3464,7 +3226,6 @@ namespace REX } return flatVec; } - }; // ZW: vector transformation string_to_double std::shared_ptr> vecStoD( const std::vector dataVec ) @@ -3500,53 +3261,21 @@ namespace REX // ZW: bool struct to define which double values // to extract transposed from LHE file - struct lheRetDs{ - public: - bool ebmup = false; - bool xsecup = false; - bool xerrup = false; - bool xmaxup = false; - bool xwgtup = false; - bool scalup = false; - bool aqedup = false; - bool aqcdup = false; - bool pup = true; - bool mass = false; - bool vtimup = false; - bool spinup = false; - std::vector getBools(){ + std::vector lheRetDs::getBools(){ return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, pup, mass, vtimup, spinup }; } - }; // ZW: bool struct to define which int values // to extract transposed from LHE file - struct lheRetInts{ - public: - //bool maxpup = false; - bool idbmup = false; - bool pdfgup = false; - bool pdfsup = false; - bool idwtup = false; - bool nprup = false; - bool lprup = false; - //bool maxnup = false; - bool nup = true; - bool idprup = false; - bool idup = true; - bool istup = true; - bool mothup = false; - bool icolup = false; - std::vector getBools(){ + std::vector lheRetInts::getBools(){ return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, nup, idprup, idup, istup, mothup, icolup }; } - }; // ZW: function for extracting transposed double values // from LHE file - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ) + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals ) { // ZW: hard-setting returning g_S instead of a_S for now bool aStogS = true; @@ -3605,7 +3334,7 @@ namespace REX return lheDos; } - std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ) + std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals ) { // ZW: hard-setting returning g_S instead of a_S for now bool aStogS = true; @@ -3960,4 +3689,4 @@ namespace REX } } -#endif \ No newline at end of file +#endif diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 4fe4023730..7fa2ab2b5f 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -30,6 +30,7 @@ int usage( char* argv0, int ret = 1 ) int main( int argc, char** argv ){ + std::cout << "Starting reweighting driver...\n"; std::string lheFilePath; std::string rwgtCardPath; std::string outputPath; @@ -38,9 +39,8 @@ int main( int argc, char** argv ){ if (argc < 2){ return usage( argv[0] ); } - // READ COMMAND LINE ARGUMENTS - for( int i = 1; i <= argc; i++ ) + for( int i = 1; i < argc; i++ ) { auto currArg = std::string( argv[i] ); if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) @@ -55,11 +55,12 @@ int main( int argc, char** argv ){ } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); } - { + else { return usage( argv[0] ); } } + if( lheFilePath.empty() || rwgtCardPath.empty() ){ return usage( argv[0] ); } @@ -76,12 +77,16 @@ int main( int argc, char** argv ){ if( onWindows ){ if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ slhaPath = "..\\..\\Cards\\param_card.dat"; + } else if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 3 ) == "Sub" ){ + slhaPath = "..\\Cards\\param_card.dat"; } else{ slhaPath = "\\Cards\\param_card.dat"; } } else { if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ slhaPath = "../../Cards/param_card.dat"; + } else if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 3 ) == "Sub" ) { + slhaPath = "../Cards/param_card.dat"; } else { slhaPath = "/Cards/param_card.dat"; } @@ -98,7 +103,7 @@ int main( int argc, char** argv ){ REX::teaw::ampCall subProcSet; for( auto proc : runSet ){ - subProcSet.insert( REX::teaw::ampPair( proc.procEvent, proc.bridgeCall ) ); + subProcSet.insert( REX::teaw::ampPair( proc.procEventInt, proc.bridgeCall ) ); } //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc new file mode 100644 index 0000000000..a927754625 --- /dev/null +++ b/tools/REX/rwgt_instance.cc @@ -0,0 +1,78 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// Library including generic functions and classes for event reweighting. +// Process-specific rwgt_runner files are generated by mg5amc@nlo and use +// this library, while the rwgt_driver file is a wrapping program that +// calls the process-specific runners for given subprocesses. +//========================================================================== + +#ifndef _RWGT_INSTANCE_CC_ +#define _RWGT_INSTANCE_CC_ + +#include "rwgt_instance.h" + +namespace rwgt{ + + + //ZW: Function for calculating the number of remaining events in a warp + // in order to pad the input arrays to a multiple of the warp size + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp ){ + return (nWarp - ( nEvt % nWarp )) % nWarp; + } + + //ZW: Function for padding the input arrays to a multiple of the warp size + template + std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ + auto nEvt = input->size(); + auto nWarpRemain = warpRemain( nEvt, nWarp ); + auto fauxNEvt = nEvt + nWarpRemain; + auto output = std::make_shared>( fauxNEvt ); + std::copy( input.begin(), input.end(), output->begin()); + return output; + } + + instance::instance(){} + instance::instance( std::vector>& event){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventInt = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event){ + this->procEventStr = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventStr = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventStr = event; + this->process = REX::event( event ); + } + void instance::setAmp( REX::teaw::amplitude& amp ){ + bridgeCall = amp; + } + std::shared_ptr> instance::ampEval( std::vector& momenta, std::vector& alphaS ){ + return bridgeCall( momenta, alphaS ); + } + std::shared_ptr> instance::ampEval( std::shared_ptr> momenta, + std::shared_ptr> alphaS ){ + return bridgeCall( *momenta, *alphaS ); + } + +} + +#endif diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index e87219b001..376635933e 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -13,7 +13,7 @@ #ifndef _RWGT_INSTANCE_H_ #define _RWGT_INSTANCE_H_ -#include "teawREX.hpp" +#include "teawREX.h" namespace rwgt{ @@ -21,49 +21,24 @@ namespace rwgt{ //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size - unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ - return (nWarp - ( nEvt % nWarp )) % nWarp; - } - - //ZW: Function for padding the input arrays to a multiple of the warp size - template - std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ - auto nEvt = input->size(); - auto nWarpRemain = warpRemain( nEvt, nWarp ); - auto fauxNEvt = nEvt + nWarpRemain; - auto output = std::make_shared>( fauxNEvt ); - std::copy( input.begin(), input.end(), output->begin()); - return output; - } + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ); struct instance{ - std::vector> procEvent; + std::vector> procEventInt; + std::vector> procEventStr; REX::event process; REX::teaw::amplitude bridgeCall; - instance(){} - instance( std::vector>& event){ - this->procEvent = event; - this->process = REX::event( event ); - } - instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEvent = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void setProc( std::vector>& event ){ - this->procEvent = event; - this->process = REX::event( event ); - } - void setAmp( REX::teaw::amplitude& amp ){ - bridgeCall = amp; - } - std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ){ - return bridgeCall( momenta, alphaS ); - } + instance(); + instance( std::vector>& event); + instance( std::vector>& event, REX::teaw::amplitude& amp ); + void setProc( std::vector>& event ); + instance( std::vector>& event); + instance( std::vector>& event, REX::teaw::amplitude& amp ); + void setProc( std::vector>& event ); + void setAmp( REX::teaw::amplitude& amp ); + std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ); std::shared_ptr> ampEval( std::shared_ptr> momenta, - std::shared_ptr> alphaS ){ - return bridgeCall( *momenta, *alphaS ); - } + std::shared_ptr> alphaS ); }; } diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 14d2dfdc79..51be5e7ec7 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -12,7 +12,6 @@ %(process_lines)s //-------------------------------------------------------------------------- -#include "teawREX.hpp" #include "rwgt_instance.h" #include "fbridge.cc" @@ -65,7 +64,7 @@ namespace %(process_namespace)s{ } std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ runnerSetup( alphaS ); - for( size_t k = 0 ; k < nWarpRemain ; ++k ){ + for( size_t j = 0 ; j < nWarpRemain ; ++j ){ alphaS.push_back( 0. ); for( size_t k = 0 ; k < nMom * nPar ; ++k ){ momenta.push_back( 0. ); @@ -113,7 +112,7 @@ namespace %(process_namespace)s{ // auto procEvent = REX::event( procEvent ); // REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - std::vector> eventVec = {%(process_event)s}; + std::vector> eventVec = {%(process_event)s}; REX::event locEv = REX::event( eventVec ); fbridgeRunner fBridge = fbridgeRunner( locEv ); @@ -124,7 +123,6 @@ namespace %(process_namespace)s{ REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; auto runner = rwgt::instance(eventVec, scatteringAmp); - //auto thisProc = runner.getProc( scatteringAmp ); auto thisProc = runner.process.getProc( currProcSort ); // ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc new file mode 120000 index 0000000000..f9640c2fcb --- /dev/null +++ b/tools/REX/teawREX.cc @@ -0,0 +1 @@ +teawREX.hpp \ No newline at end of file diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h new file mode 100644 index 0000000000..a865db4944 --- /dev/null +++ b/tools/REX/teawREX.h @@ -0,0 +1,188 @@ +/*** + * _ ______ _______ __ + * | | | ___ \ ___\ \ / / + * | |_ ___ __ ___ _| |_/ / |__ \ V / + * | __/ _ \/ _` \ \ /\ / / /| __| / \ + * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ + * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _TEAWREX_H_ +#define _TEAWREX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "REX.h" + +namespace REX::teaw +{ + + using amplitude = std::function>(std::vector&, std::vector&)>; + using ampCall = std::map; + using ampPair = std::pair; + using vecMap = std::map>, REX::eventComp>; + + struct rwgtVal : REX::paramVal{ + public: + std::string_view blockName; + bool allStat; + bool isAll(); + rwgtVal(); + rwgtVal( std::string_view paramLine ); + std::string_view getLine(); + void outWrite( REX::paramBlock& srcBlock ); + }; + + struct rwgtBlock { + public: + std::string_view name; + std::vector rwgtVals; + rwgtBlock( std::vector values = {}, std::string_view title = "" ); + rwgtBlock( const std::vector& vals, std::string_view title = "" ); + std::string_view getBlock(); + void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ); + protected: + std::string runBlock; + bool written = false; + }; + + struct rwgtProc { + public: + std::vector rwgtParams; + std::string_view procString; + std::string_view rwgtName; + std::vector rwgtOpts; + void parse(); + rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ); + std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ); + std::string_view comRunProc(); + }; + + struct rwgtCard{ + public: + REX::lesHouchesCard slhaCard; + std::vector rwgtRuns; + std::vector rwgtProcs; + std::vector opts; + std::vector rwgtNames; + std::string_view srcCard; + void parse( bool parseOnline = false ); + rwgtCard( std::string_view reweight_card ); + rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ); + std::vector> writeCards( REX::lesHouchesCard& slhaOrig ); + }; + + + struct rwgtCollection { + public: + void setRwgt( std::shared_ptr rwgts ); + void setRwgt( rwgtCard rwgts ); + void setSlha( std::shared_ptr slha ); + void setSlha( REX::lesHouchesCard slha ); + void setLhe( std::shared_ptr lhe ); + void setLhe( REX::lheNode& lhe ); + void setLhe( std::string_view lhe_file ); + std::shared_ptr getRwgt(); + std::shared_ptr getSlha(); + std::shared_ptr getLhe(); + rwgtCollection(); + rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ); + protected: + template + void setDoubles(Args&&... args); + std::shared_ptr rwgtSets; + std::shared_ptr slhaParameters; + std::shared_ptr lheFile; + std::vector>> wgts; + std::vector>> gS; + std::vector>> momenta; + bool lheFileSet = false; + bool slhaSet = false; + bool rwgtSet = false; + REX::transLHE eventFile; + }; + + struct rwgtFiles : rwgtCollection { + void setRwgtPath( std::string_view path ); + void setSlhaPath( std::string_view path ); + void setLhePath( std::string_view path ); + rwgtFiles(); + rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ); + template + void initCards(Args&&... args); + template + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ); + protected: + void pullRwgt(); + void pullSlha(); + void pullLhe(); + std::string rwgtPath; + std::string lhePath; + std::string slhaPath; + std::shared_ptr lheCard; + std::shared_ptr slhaCard; + std::shared_ptr rewgtCard; + }; + + struct rwgtRunner : rwgtFiles{ + public: + void setMeEval( amplitude eval ); + void setMeEvals( ampCall evals ); + void addMeEval( const REX::event& ev, const amplitude& eval ); + rwgtRunner(); + rwgtRunner( rwgtFiles& rwgts ); + rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ); + rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ); + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + amplitude meCalc ); + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + ampCall meCalcs ); + bool oneME(); + bool singAmp(); + protected: + bool meInit = false; + bool meCompInit = false; + bool meSet = false; + bool normWgtSet = false; + amplitude meEval; + ampCall meEvals; + std::vector>> initMEs; + std::vector>> meNormWgts; + std::shared_ptr> normWgt; + std::shared_ptr rwgtGroup; + template + void setMEs(Args&&... args); + bool setParamCard( std::shared_ptr slhaParams ); + void setNormWgtsSingleME(); + void setNormWgtsMultiME(); + template + void setNormWgts(Args&&... args); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + std::string& id, REX::event& ev ); + bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ); + public: + void runRwgt( const std::string& output ); + }; + + + void rwgtRun( rwgtRunner& rwgt, const std::string& path ); + +} + +#endif \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 971b563f82..ae9efff776 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -15,8 +15,8 @@ // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. -#ifndef _TEAWREX_HPP_ -#define _TEAWREX_HPP_ +#ifndef _TEAWREX_CC_ +#define _TEAWREX_CC_ #include #include @@ -25,16 +25,12 @@ #include #include #include -#include "REX.hpp" +#include "REX.cc" +#include "teawREX.h" namespace REX::teaw { - using amplitude = std::function>(std::vector&, std::vector&)>; - using ampCall = std::map; - using ampPair = std::pair; - using vecMap = std::map>, REX::eventComp>; - template std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) { return evalFunc(momenta); } @@ -51,13 +47,8 @@ namespace REX::teaw std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) { return evalFunc(momenta); } - struct rwgtVal : REX::paramVal{ - public: - std::string_view blockName; - bool allStat; - bool isAll(){ return (idStr == "all"); } - rwgtVal() : paramVal(){ return; } - rwgtVal( std::string_view paramLine ) + rwgtVal::rwgtVal() : paramVal(){ return; } + rwgtVal::rwgtVal( std::string_view paramLine ) : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } realLine = paramLine; auto vals = *REX::nuBlankSplitter( realLine ); @@ -65,8 +56,9 @@ namespace REX::teaw idStr = vals[2]; valStr = vals[3]; } - std::string_view getLine(){ return realLine; } - void outWrite( REX::paramBlock& srcBlock ){ + std::string_view rwgtVal::getLine(){ return realLine; } + bool rwgtVal::isAll(){ return (idStr == "all"); } + void rwgtVal::outWrite( REX::paramBlock& srcBlock ){ if ( isAll() ) { for( auto param : srcBlock.params ) @@ -89,13 +81,8 @@ namespace REX::teaw srcBlock.modded = true; return; } - }; - struct rwgtBlock { - public: - std::string_view name; - std::vector rwgtVals; - rwgtBlock( std::vector values = {}, std::string_view title = "" ) + rwgtBlock::rwgtBlock( std::vector values, std::string_view title) { name = title; rwgtVals.resize( values.size() ); @@ -104,12 +91,12 @@ namespace REX::teaw rwgtVals[k] = rwgtVal( values[k] ); } } - rwgtBlock( const std::vector& vals, std::string_view title = "" ) + rwgtBlock::rwgtBlock( const std::vector& vals, std::string_view title ) { name = title; rwgtVals = vals; } - std::string_view getBlock(){ + std::string_view rwgtBlock::getBlock(){ if( written ){ return runBlock; } runBlock = ""; for( auto val : rwgtVals ){ @@ -118,7 +105,7 @@ namespace REX::teaw written = true; return runBlock; } - void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) + void rwgtBlock::outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) { for( auto parm : rwgtVals ) { @@ -127,18 +114,8 @@ namespace REX::teaw srcBlock.modded = true; return; } - protected: - std::string runBlock; - bool written = false; - }; - struct rwgtProc { - public: - std::vector rwgtParams; - std::string_view procString; - std::string_view rwgtName; - std::vector rwgtOpts; - void parse(){ + void rwgtProc::parse(){ std::vector blocks; std::vector>> params; auto procLines = *REX::nuLineSplitter( procString ); @@ -162,7 +139,7 @@ namespace REX::teaw rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); } } - rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ) + rwgtProc::rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet, bool parseOnline ) { if( rwgtSet == "" ){ return; } auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; @@ -172,7 +149,7 @@ namespace REX::teaw procString = rwgtSet.substr( strtLi, endLi - strtLi ); if( parseOnline ){ parse(); } } - std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ + std::shared_ptr rwgtProc::outWrite( const REX::lesHouchesCard& paramOrig ){ auto slhaOrig = std::make_shared( paramOrig ); std::map blockIds; for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) @@ -184,18 +161,9 @@ namespace REX::teaw slhaOrig->modded = true; return slhaOrig; } - std::string_view comRunProc(){ return procString; } - }; + std::string_view rwgtProc::comRunProc(){ return procString; } - struct rwgtCard{ - public: - REX::lesHouchesCard slhaCard; - std::vector rwgtRuns; - std::vector rwgtProcs; - std::vector opts; - std::vector rwgtNames; - std::string_view srcCard; - void parse( bool parseOnline = false ) { + void rwgtCard::parse( bool parseOnline ) { auto strt = srcCard.find("launch"); while( auto commPos = srcCard.find_last_of("#", strt) > srcCard.find_last_of("\n", strt) ){ if( commPos == REX::npos ){ @@ -252,15 +220,15 @@ namespace REX::teaw } } } - rwgtCard( std::string_view reweight_card ){ + rwgtCard::rwgtCard( std::string_view reweight_card ){ srcCard = reweight_card; } - rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ){ + rwgtCard::rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline ){ srcCard = reweight_card; slhaCard = slhaParams; if( parseOnline ){ parse( parseOnline ); } } - std::vector> writeCards( REX::lesHouchesCard& slhaOrig ){ + std::vector> rwgtCard::writeCards( REX::lesHouchesCard& slhaOrig ){ std::vector> cardVec; slhaOrig.parse(); cardVec.reserve( rwgtRuns.size() ); @@ -270,58 +238,54 @@ namespace REX::teaw } return cardVec; } - }; - struct rwgtCollection { - public: - void setRwgt( std::shared_ptr rwgts ){ + void rwgtCollection::setRwgt( std::shared_ptr rwgts ){ if( rwgtSet ){ return; } rwgtSets = rwgts; rwgtSet = true; } - void setRwgt( rwgtCard rwgts ){ + void rwgtCollection::setRwgt( rwgtCard rwgts ){ if( rwgtSet ){ return; } setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; } - void setSlha( std::shared_ptr slha ){ + void rwgtCollection::setSlha( std::shared_ptr slha ){ if( slhaSet ){ return; } slhaParameters = slha; slhaParameters->parse(); slhaSet = true; } - void setSlha( REX::lesHouchesCard slha ){ + void rwgtCollection::setSlha( REX::lesHouchesCard slha ){ if( slhaSet ){ return; } setSlha( std::make_shared( slha ) ); slhaSet = true; } - void setLhe( std::shared_ptr lhe ){ + void rwgtCollection::setLhe( std::shared_ptr lhe ){ if( lheFileSet ){ return; } lheFile = lhe; lheFileSet = true; } - void setLhe( REX::lheNode& lhe ){ + void rwgtCollection::setLhe( REX::lheNode& lhe ){ if( lheFileSet ){ return; } setLhe( std::make_shared( lhe ) ); lheFileSet = true; } - void setLhe( std::string_view lhe_file ){ - if( lheFileSet ){ return; } + void rwgtCollection::setLhe( std::string_view lhe_file ){std::cout << "line 272\n"; + if( lheFileSet ){ return; } std::cout << "line 273\n"; //lheFile = REX::lheParser( lhe_file, strt, post ); - lheFile = std::make_shared( *lheFile ); - lheFileSet = true; - } - std::shared_ptr getRwgt(){ return rwgtSets; } - std::shared_ptr getSlha(){ return slhaParameters; } - std::shared_ptr getLhe(){ return lheFile; } - rwgtCollection(){ return; } - rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ + lheFile = std::make_shared( REX::lheNode(lhe_file) ); std::cout << "line 275\n"; + lheFileSet = true; std::cout << "line 276\n"; + } + std::shared_ptr rwgtCollection::getRwgt(){ return rwgtSets; } + std::shared_ptr rwgtCollection::getSlha(){ return slhaParameters; } + std::shared_ptr rwgtCollection::getLhe(){ return lheFile; } + rwgtCollection::rwgtCollection(){ return; } + rwgtCollection::rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ setLhe( lhe ); setSlha( slha ); setRwgt( rwgts ); } - protected: template - void setDoubles(Args&&... args){ + void rwgtCollection::setDoubles(Args&&... args){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; @@ -337,30 +301,18 @@ namespace REX::teaw momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } } - std::shared_ptr rwgtSets; - std::shared_ptr slhaParameters; - std::shared_ptr lheFile; - std::vector>> wgts; - std::vector>> gS; - std::vector>> momenta; - bool lheFileSet = false; - bool slhaSet = false; - bool rwgtSet = false; - REX::transLHE eventFile; - }; - struct rwgtFiles : rwgtCollection { - void setRwgtPath( std::string_view path ){ rwgtPath = path; } - void setSlhaPath( std::string_view path ){ slhaPath = path; } - void setLhePath( std::string_view path ){ lhePath = path; } - rwgtFiles() : rwgtCollection(){ return; } - rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ + void rwgtFiles::setRwgtPath( std::string_view path ){ rwgtPath = path; } + void rwgtFiles::setSlhaPath( std::string_view path ){ slhaPath = path; } + void rwgtFiles::setLhePath( std::string_view path ){ lhePath = path; } + rwgtFiles::rwgtFiles() : rwgtCollection(){ return; } + rwgtFiles::rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ setRwgtPath( reweight_card ); setSlhaPath( slha_card ); setLhePath( lhe_card ); } template - void initCards(Args&&... args){ + void rwgtFiles::initCards(Args&&... args){ if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); pullRwgt(); pullSlha(); pullLhe(); @@ -370,75 +322,55 @@ namespace REX::teaw setDoubles(args...); } template - void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ + void rwgtFiles::initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ setLhePath( lhe_card ); setSlhaPath( slha_card ); setRwgtPath( reweight_card ); initCards(args...); } - protected: - void pullRwgt(){ + void rwgtFiles::pullRwgt(){ rewgtCard = REX::filePuller( rwgtPath ); } - void pullSlha(){ + void rwgtFiles::pullSlha(){ slhaCard = REX::filePuller( slhaPath ); } - void pullLhe(){ + void rwgtFiles::pullLhe(){ lheCard = REX::filePuller( lhePath ); + std::cout << *lheCard << "\n"; } - std::string rwgtPath; - std::string lhePath; - std::string slhaPath; - std::shared_ptr lheCard; - std::shared_ptr slhaCard; - std::shared_ptr rewgtCard; - }; - struct rwgtRunner : rwgtFiles{ - public: - void setMeEval( amplitude eval ){ + void rwgtRunner::setMeEval( amplitude eval ){ meEval = eval; meInit = true; ampCall nuEvals; nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); meEvals = nuEvals; } - void setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } - void addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } - rwgtRunner() : rwgtFiles(){ return; } - rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } - rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ + void rwgtRunner::setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } + void rwgtRunner::addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } + rwgtRunner::rwgtRunner() : rwgtFiles(){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ meEval = meCalc; meInit = true; } - rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ meEvals = meCalcs; meCompInit = true; } - rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEval = meCalc; meInit = true; } - rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEvals = meCalcs; meCompInit = true; } - bool oneME(){ return (meInit != meCompInit); } - bool singAmp(){ return (meInit && !meCompInit); } - protected: - bool meInit = false; - bool meCompInit = false; - bool meSet = false; - bool normWgtSet = false; - amplitude meEval; - ampCall meEvals; - std::vector>> initMEs; - std::vector>> meNormWgts; - std::shared_ptr> normWgt; - std::shared_ptr rwgtGroup; + bool rwgtRunner::oneME(){ return (meInit != meCompInit); } + bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } template - void setMEs(Args&&... args){ + void rwgtRunner::setMEs(Args&&... args){ initCards(args...); if( !oneME() ) throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); @@ -453,7 +385,7 @@ namespace REX::teaw //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; meSet = true; } - bool setParamCard( std::shared_ptr slhaParams ){ + bool rwgtRunner::setParamCard( std::shared_ptr slhaParams ){ if( slhaPath == "" ) throw std::runtime_error( "No parameter card path has been provided." ); if( slhaParameters == nullptr ) @@ -462,7 +394,7 @@ namespace REX::teaw throw std::runtime_error( "Failed to overwrite parameter card." ); return true; } - void setNormWgtsSingleME(){ + void rwgtRunner::setNormWgtsSingleME(){ //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); meNormWgts = {std::make_shared>( wgts[0]->size() )}; @@ -471,7 +403,7 @@ namespace REX::teaw } normWgt = meNormWgts[0]; } - void setNormWgtsMultiME(){ + void rwgtRunner::setNormWgtsMultiME(){ meNormWgts = std::vector>>( initMEs.size() ); for( auto k = 0 ; k < wgts.size() ; ++k ){ meNormWgts[k] = std::make_shared>( wgts[k]->size() ); @@ -482,7 +414,7 @@ namespace REX::teaw normWgt = eventFile.vectorFlat( meNormWgts ); } template - void setNormWgts(Args&&... args){ + void rwgtRunner::setNormWgts(Args&&... args){ if( !oneME() ){ setMEs(args...); } //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); @@ -494,7 +426,7 @@ namespace REX::teaw else { setNormWgtsMultiME(); } normWgtSet = true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -518,7 +450,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -542,7 +474,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -567,7 +499,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); @@ -592,14 +524,13 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ){ + bool rwgtRunner::lheFileWriter( std::shared_ptr lheFile, std::string outputDir ){ bool writeSuccess = REX::filePusher( outputDir, *lheFile->nodeWriter() ); if( !writeSuccess ) throw std::runtime_error( "Failed to write LHE file." ); return true; } - public: - void runRwgt( const std::string& output ){ + void rwgtRunner::runRwgt( const std::string& output ){ setMEs(); setNormWgts(); rwgtGroup = std::make_shared(); @@ -613,7 +544,10 @@ namespace REX::teaw REX::filePusher( slhaPath, *slhaCard ); std::cout << "\nReweighting done.\n"; } - }; + + void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ + rwgt.runRwgt( path ); + } } -#endif \ No newline at end of file +#endif diff --git a/tools/REX/tester.cpp b/tools/REX/tester.cpp index d7d8493c25..9a795d1a7e 100644 --- a/tools/REX/tester.cpp +++ b/tools/REX/tester.cpp @@ -19,15 +19,26 @@ std::shared_ptr> sorterFunc(std::string_view dummy, std::vec int main( int argc, char* argv[] ){ std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; // READ COMMAND LINE ARGUMENTS - for( int arg = 0; arg < argc; arg++ ) +for( int i = 1; i < argc; i++ ) { - auto currArg = std::string( argv[arg] ); + auto currArg = std::string( argv[i] ); if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) { lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ + slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); + } } @@ -57,6 +68,10 @@ int main( int argc, char* argv[] ){ } std::cout << evsVals->size() << "\n"; std::cout << siz << "\n"; + + REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + return 0; } \ No newline at end of file diff --git a/tools/REX/unweighted_events.lhe b/tools/REX/unweighted_events.lhe deleted file mode 100644 index 6b05b56584..0000000000 --- a/tools/REX/unweighted_events.lhe +++ /dev/null @@ -1,1870 +0,0 @@ - -
- - -3.5.2 - - - 3j -output -]]> - - -#********************************************************************* -# MadGraph/MadEvent * -# http://madgraph.hep.uiuc.edu * -# * -# proc_card.dat * -#********************************************************************* -# * -# This Files is generated by MADGRAPH 5 * -# * -# WARNING: This Files is generated for MADEVENT (compatibility issue)* -# This files is NOT a valid MG4 proc_card.dat * -# Running this in MG4 will NEVER reproduce the result of MG5* -# * -#********************************************************************* -#********************************************************************* -# Process(es) requested : mg2 input * -#********************************************************************* -# Begin PROCESS # This is TAG. Do not modify this line -p p > 3j #Process -# Be carefull the coupling are here in MG5 convention - -end_coup # End the couplings input - -done # this tells MG there are no more procs -# End PROCESS # This is TAG. Do not modify this line -#********************************************************************* -# Model information * -#********************************************************************* -# Begin MODEL # This is TAG. Do not modify this line -sm -# End MODEL # This is TAG. Do not modify this line -#********************************************************************* -# Start multiparticle definitions * -#********************************************************************* -# Begin MULTIPARTICLES # This is TAG. Do not modify this line - -# End MULTIPARTICLES # This is TAG. Do not modify this line - - - - - -###################################################################### -## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### -###################################################################### -################################### -## INFORMATION FOR MASS -################################### -BLOCK MASS # - 5 4.700000e+00 # mb - 6 1.730000e+02 # mt - 15 1.777000e+00 # mta - 23 9.118800e+01 # mz - 25 1.250000e+02 # mh - 1 0.000000e+00 # d : 0.0 - 2 0.000000e+00 # u : 0.0 - 3 0.000000e+00 # s : 0.0 - 4 0.000000e+00 # c : 0.0 - 11 0.000000e+00 # e- : 0.0 - 12 0.000000e+00 # ve : 0.0 - 13 0.000000e+00 # mu- : 0.0 - 14 0.000000e+00 # vm : 0.0 - 16 0.000000e+00 # vt : 0.0 - 21 0.000000e+00 # g : 0.0 - 22 0.000000e+00 # a : 0.0 - 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) -################################### -## INFORMATION FOR SMINPUTS -################################### -BLOCK SMINPUTS # - 1 1.325070e+02 # aewm1 - 2 1.166390e-05 # gf - 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) -################################### -## INFORMATION FOR YUKAWA -################################### -BLOCK YUKAWA # - 5 4.700000e+00 # ymb - 6 1.730000e+02 # ymt - 15 1.777000e+00 # ymtau -################################### -## INFORMATION FOR DECAY -################################### -DECAY 6 1.491500e+00 # wt -DECAY 23 2.441404e+00 # wz -DECAY 24 2.047600e+00 # ww -DECAY 25 6.382339e-03 # wh -DECAY 1 0.000000e+00 # d : 0.0 -DECAY 2 0.000000e+00 # u : 0.0 -DECAY 3 0.000000e+00 # s : 0.0 -DECAY 4 0.000000e+00 # c : 0.0 -DECAY 5 0.000000e+00 # b : 0.0 -DECAY 11 0.000000e+00 # e- : 0.0 -DECAY 12 0.000000e+00 # ve : 0.0 -DECAY 13 0.000000e+00 # mu- : 0.0 -DECAY 14 0.000000e+00 # vm : 0.0 -DECAY 15 0.000000e+00 # ta- : 0.0 -DECAY 16 0.000000e+00 # vt : 0.0 -DECAY 21 0.000000e+00 # g : 0.0 -DECAY 22 0.000000e+00 # a : 0.0 - - -# Number of Events : 100 -# Integrated weight (pb) : 66372287.22200001 - -
- -2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 -6.637229e+07 1.268397e+06 6.637229e+07 1 -please cite 1405.0301 - - - 5 1 +6.6372287e+07 4.60140800e+01 7.54677100e-03 1.46810800e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.8788806474e+02 1.8788806474e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.0556910363e+01 3.0556910363e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.0298827890e+01 -4.1053633424e+01 +8.3051244550e+01 9.3214676391e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +1.7276524932e+01 -1.2156784273e+01 -1.1495329061e+01 2.4050120744e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -6.9776970424e+00 +5.3210417698e+01 +8.5775238884e+01 1.0118017797e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.46014081E+02 -0 - 1 21 0.28905856E-01 0.46014081E+02 - 1 21 0.47010632E-02 0.46014081E+02 - 0.31830845E+06 - - - - 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32555892E+02 -0 - 1 21 0.43037713E-01 0.32555892E+02 - 1 21 0.77100414E-02 0.32555892E+02 - 0.65037882E+05 - - - - 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30590836E+02 -0 - 1 21 0.97313711E-01 0.30590836E+02 - 1 2 0.70353702E-01 0.30590836E+02 - 0.91658669E+02 - - - - 5 1 +6.6372287e+07 1.24970000e+02 7.54677100e-03 1.23511600e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +6.4054339688e+02 6.4054339688e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.3928351011e+01 3.3928351011e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -3.6546574781e+01 +7.3293152180e+00 +5.3085336864e+01 6.4864658942e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 -8.4789922053e+01 +1.0871076160e+01 +6.9212770934e+01 1.0999053977e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +1.2133649683e+02 -1.8200391378e+01 +4.8431693807e+02 4.9961654918e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.12497005E+03 -0 - 1 21 0.98545129E-01 0.12497005E+03 - 1 21 0.52197468E-02 0.12497005E+03 - 0.21698561E+05 - - - - 5 1 +6.6372287e+07 2.09917500e+01 7.54677100e-03 1.72629600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.9393491974e+01 2.9393491974e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0612634540e+01 5.0612634540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.2361493101e+01 -8.0134576492e+00 -2.5339678876e+01 3.4732566890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +5.3440837509e+00 +2.0304167068e+01 +1.0307030697e+01 2.3389170854e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +1.7017409350e+01 -1.2290709419e+01 -6.1864943863e+00 2.1884388769e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.20991755E+02 -0 - 1 21 0.45220758E-02 0.20991755E+02 - 1 21 0.77865590E-02 0.20991755E+02 - 0.28846636E+07 - - - - 5 1 +6.6372287e+07 2.01883800e+01 7.54677100e-03 1.74160800e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.9616331394e+01 2.9616331394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.1809592212e+02 2.1809592212e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.4332017667e+01 -1.5898231494e+01 -1.1283261663e+02 1.1484493837e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +5.5127621513e+00 +1.9607065270e+01 -9.9531289229e+01 1.0159382408e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.9844779818e+01 -3.7088337755e+00 +2.3884315130e+01 3.1273491063e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.20188381E+02 -0 - 1 21 0.45563588E-02 0.20188381E+02 - 1 21 0.33553218E-01 0.20188381E+02 - 0.23199633E+06 - - - - 5 1 +6.6372287e+07 2.83114100e+01 7.54677100e-03 1.61754100e-01 - -3 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.8261154183e+01 2.8261154183e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -6.5070264344e+01 6.5070264344e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +7.3144338996e+00 +3.7539358060e+01 -8.3663539266e+00 3.9149715515e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.5538451858e+01 -1.6013356486e+01 -1.8894895213e+01 2.9238470159e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -3 1 1 2 0 504 +8.2240179584e+00 -2.1526001574e+01 -9.5478610208e+00 2.4943232854e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.28311412E+02 -0 - 1 21 0.10010810E-01 0.28311412E+02 - 1 -3 0.43478699E-02 0.28311412E+02 - 0.75606750E+05 - - - - 5 1 +6.6372287e+07 2.50484100e+01 7.54677100e-03 1.66030800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.5278855952e+02 4.5278855952e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3454632319e+00 3.3454632319e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -1.5102249073e+01 -2.7392413109e+01 +1.7894067235e+02 1.8165402953e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.1057931191e+01 +3.9670307239e+00 +9.9776507011e+01 1.0205158083e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -5.9556821180e+00 +2.3425382385e+01 +1.7072591693e+02 1.7242841240e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.25048406E+02 -0 - 1 21 0.69659730E-01 0.25048406E+02 - 1 21 0.51468701E-03 0.25048406E+02 - 0.16161844E+07 - - - - 5 1 +6.6372287e+07 6.54738600e+01 7.54677100e-03 1.37619800e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +2.6663794394e+01 2.6663794394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.5265738923e+02 2.5265738923e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 -7.6137868107e+00 +4.2439462980e+01 -1.6255497692e+02 1.6817609310e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.7587948234e+01 +1.0621679064e+01 -4.5177420050e+01 4.9630185085e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +2.5201735045e+01 -5.3061142044e+01 -1.8261197867e+01 6.1514905444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.65473858E+02 -0 - 1 21 0.41021221E-02 0.65473858E+02 - 1 1 0.38870368E-01 0.65473858E+02 - 0.41073273E+05 - - - - 5 1 +6.6372287e+07 4.71053000e+01 7.54677100e-03 1.46161100e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.0723487937e+02 1.0723487937e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2434583342e+02 1.2434583342e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -1.5074548460e+01 +4.4668996332e+01 +7.0907382043e+01 8.5149386802e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -3.7446327852e+01 -2.8577640944e+01 -7.7213750461e+01 9.0448174619e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +5.2520876312e+01 -1.6091355388e+01 -1.0804585631e+01 5.5983151371e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.47105297E+02 -0 - 1 21 0.16497674E-01 0.47105297E+02 - 1 21 0.19130128E-01 0.47105297E+02 - 0.81247298E+05 - - - - 5 1 +6.6372287e+07 4.77488600e+01 7.54677100e-03 1.45787600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.3351097238e+02 1.3351097238e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.1959914093e+03 2.1959914093e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.6534412892e+01 +2.0887502154e+01 -2.0204850067e+03 2.0207671872e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.7723341226e+01 -6.8071401227e+01 -1.6302498162e+02 1.7882797305e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 -1.1889283334e+00 +4.7183899072e+01 +1.2102955134e+02 1.2990722143e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.47748865E+02 -0 - 1 21 0.20540149E-01 0.47748865E+02 - 1 21 0.33784484E+00 0.47748865E+02 - 0.69049208E+02 - - - - 5 1 +6.6372287e+07 5.17648700e+01 7.54677100e-03 1.43604800e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.0572435077e+02 7.0572435077e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -8.6069634546e+00 8.6069634546e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.3775888591e+01 -5.0832360721e+00 +1.9988986553e+01 3.1475256166e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +2.1064391322e+01 -6.4983212153e+00 +5.8809348241e+01 6.2805065090e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 502 -4.4840279913e+01 +1.1581557287e+01 +6.1831905252e+02 6.2005099297e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.51764867E+02 -0 - 1 21 0.13241488E-02 0.51764867E+02 - 1 -2 0.10857293E+00 0.51764867E+02 - 0.12387408E+05 - - - - 5 1 +6.6372287e+07 2.68215700e+01 7.54677100e-03 1.63613700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.7995183998e+01 4.7995183998e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -4.6755507222e+02 4.6755507222e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -5.6572765501e+00 -2.1816941248e+01 +1.1968309353e+01 2.5519093482e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.3663698968e+01 +1.2778475361e+01 -4.5222293332e+02 4.5302189959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +2.9320975518e+01 +9.0384658865e+00 +2.0694735751e+01 3.7009263148e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26821571E+02 -0 - 1 21 0.73838749E-02 0.26821571E+02 - 1 21 0.71931546E-01 0.26821571E+02 - 0.24837378E+05 - - - - 5 1 +6.6372287e+07 4.31543000e+01 7.54677100e-03 1.48620400e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +7.1563261884e+02 7.1563261884e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4644760996e+01 1.4644760996e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.2645094462e+01 -4.3267730831e+01 +6.6587442685e+02 6.6739849211e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -1.7724111495e+01 +1.5186816241e+01 +1.2556432303e+01 2.6503726304e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +5.0790170329e+00 +2.8080914591e+01 +2.2556998696e+01 3.6375161422e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43154296E+02 -0 - 1 21 0.11009730E+00 0.43154296E+02 - 1 21 0.22530407E-02 0.43154296E+02 - 0.66560154E+05 - - - - 5 1 +6.6372287e+07 4.37774800e+01 7.54677100e-03 1.48212100e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.6494856549e+02 2.6494856549e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4840936233e+01 4.4840936233e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 -2.3919572741e+01 -2.1836315356e+01 -2.8033165864e+01 4.2834904188e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +2.0968394452e+01 -2.8904789122e+01 +4.4445065176e+01 5.7013368771e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +2.9511782884e+00 +5.0741104479e+01 +2.0369572994e+02 2.0994122876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43777479E+02 -0 - 1 21 0.40761317E-01 0.43777479E+02 - 1 21 0.68986058E-02 0.43777479E+02 - 0.88070658E+05 - - - - 5 1 +6.6372287e+07 3.19042100e+01 7.54677100e-03 1.57794600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.5412150098e+01 3.5412150098e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.5668427371e+02 7.5668427371e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +4.2078004819e+00 -3.1373137318e+01 -7.1649764593e+02 7.1719652534e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +1.7730843460e+01 -1.5141321578e+01 -1.1865670592e+00 2.3346313849e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -2.1938643942e+01 +4.6514458896e+01 -3.5879106177e+00 5.1553584618e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.31904206E+02 -0 - 1 21 0.54480234E-02 0.31904206E+02 - 1 21 0.11641296E+00 0.31904206E+02 - 0.14302972E+05 - - - - 5 1 +6.6372287e+07 2.62752100e+01 7.54677100e-03 1.64333300e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +7.3680834147e+01 7.3680834147e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -4.3076387169e+02 4.3076387169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +5.3624128570e+00 +1.9370699338e+01 -1.8582414279e+02 1.8690797605e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.5600742238e+01 -2.1293512835e+01 +7.0284358620e+01 7.5077878991e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.0963155095e+01 +1.9228134974e+00 -2.4154325337e+02 2.4245885080e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26275207E+02 -0 - 1 21 0.11335513E-01 0.26275207E+02 - 1 2 0.66271366E-01 0.26275207E+02 - 0.56080712E+04 - - - - 5 1 +6.6372287e+07 4.55308900e+01 7.54677100e-03 1.47105400e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.1459625930e+03 2.1459625930e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5418446222e+00 5.5418446222e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +3.4560110742e+01 -1.7822362191e+01 +1.6729667012e+02 1.7175626242e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +8.6373215770e+00 +3.7927160061e+01 +1.2285930833e+02 1.2887002149e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -4.3197432319e+01 -2.0104797870e+01 +1.8502647699e+03 1.8508781537e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.45530891E+02 -0 - 1 21 0.85259319E-03 0.45530891E+02 - 1 2 0.33014743E+00 0.45530891E+02 - 0.37889394E+05 - - - - 5 1 +6.6372287e+07 3.49649300e+01 7.54677100e-03 1.54891200e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0100600886e+00 8.0100600886e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.6748462249e+03 1.6748462249e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -2.8388621465e+01 +1.5105638110e+01 -1.9595061691e+02 1.9857174623e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 +5.9035355306e+00 -3.7141587409e+01 -1.4389449039e+03 1.4394362736e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 +2.2485085934e+01 +2.2035949299e+01 -3.1940644021e+01 4.4848265200e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34964932E+02 -0 - 1 2 0.25766864E+00 0.34964932E+02 - 1 -2 0.12323170E-02 0.34964932E+02 - 0.15263237E+04 - - - - 5 1 +6.6372287e+07 3.04072400e+01 7.54677100e-03 1.59363000e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.3031354025e+01 2.3031354025e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8224559169e+02 1.8224559169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.5754317368e+01 -4.7408923451e+01 -9.5689678327e+01 1.0985174293e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.1009373966e+01 +2.4882397341e+01 -8.6110655342e-01 2.7222812438e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -1.4744943402e+01 +2.2526526110e+01 -6.2663452789e+01 6.8202390354e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30407236E+02 -0 - 1 21 0.35432851E-02 0.30407236E+02 - 1 21 0.28037785E-01 0.30407236E+02 - 0.50703811E+06 - - - - 5 1 +6.6372287e+07 2.46316000e+01 7.54677100e-03 1.66635000e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0590561410e+01 8.0590561410e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.2809489236e+02 6.2809489236e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.7815991771e+01 +2.0155583443e+01 -8.5520591269e+00 2.8227554305e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -7.5126314979e+00 -1.9813252642e+01 -6.0724710539e+02 6.0761669795e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +2.5328623269e+01 -3.4233080119e-01 +6.8294833568e+01 7.2841201522e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.24631597E+02 -0 - 1 21 0.96629979E-01 0.24631597E+02 - 1 -1 0.12398548E-01 0.24631597E+02 - 0.37172940E+03 - - - - 5 1 +6.6372287e+07 2.48386400e+01 7.54677100e-03 1.66333000e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +3.4621419117e+02 3.4621419117e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.8471995540e+01 1.8471995540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +2.4946999999e+01 +1.5290445725e+00 +2.8662250343e+02 2.8771018449e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 -1.3234624373e+00 -2.4231607655e+01 -1.6739991815e-01 2.4268300005e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 502 -2.3623537562e+01 +2.2702563083e+01 +4.1287092120e+01 5.2707702219e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.24838643E+02 -0 - 1 21 0.53263717E-01 0.24838643E+02 - 1 21 0.28418457E-02 0.24838643E+02 - 0.20767655E+06 - - - - 5 1 +6.6372287e+07 3.39483100e+01 7.54677100e-03 1.55814300e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.5448573077e+01 2.5448573077e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.0820396951e+03 1.0820396951e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +4.5361883356e+01 +2.5711927708e+01 -5.3689272592e+02 5.3941876389e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 -1.6783464189e+01 -2.3932337766e+01 -5.3486766152e+02 5.3566580701e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 -2.8578419167e+01 -1.7795899428e+00 +1.5169265433e+01 3.2403697261e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33948308E+02 -0 - 1 2 0.16646767E+00 0.33948308E+02 - 1 -2 0.39151646E-02 0.33948308E+02 - 0.59650818E+03 - - - - 5 1 +6.6372287e+07 4.00572800e+01 7.54677100e-03 1.50779000e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.2066229463e+01 3.2066229463e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.0083738526e+02 3.0083738526e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +3.4238964599e+01 -3.1475020468e+00 -2.1121471239e+02 2.1399501909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.6612873637e+01 +3.9603631259e+01 +6.1874354643e+00 4.3390316167e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.7626090962e+01 -3.6456129213e+01 -6.3743878874e+01 7.5518279467e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40057279E+02 -0 - 1 21 0.49332658E-02 0.40057279E+02 - 1 21 0.46282677E-01 0.40057279E+02 - 0.11855536E+06 - - - - 5 1 +6.6372287e+07 4.37051900e+01 7.54677100e-03 1.48259100e-01 - 2 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +1.7110304904e+03 1.7110304904e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.0783382913e+01 2.0783382913e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.4418127206e+01 +2.0747890384e+01 +7.9570356529e+01 8.3485321978e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +3.6111610870e+01 -3.0183395268e+01 +1.6215922235e+03 1.6222750769e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 502 0 -2.1693483664e+01 +9.4355048838e+00 -1.0915472595e+01 2.6053474392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.43705192E+02 -0 - 1 2 0.26323535E+00 0.43705192E+02 - 1 1 0.31974449E-02 0.43705192E+02 - 0.43186860E+03 - - - - 5 1 +6.6372287e+07 3.25233300e+01 7.54677100e-03 1.57176200e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.1530668898e+01 2.1530668898e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -5.8911499310e+02 5.8911499310e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +3.1914103860e+01 -3.6894354070e+01 -1.3312376381e+02 1.4178025208e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.0765125773e+01 +2.9189748902e+01 +2.6051922163e+00 3.1220448433e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.1148978087e+01 +7.7046051684e+00 -4.3706575261e+02 4.3764496149e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32523330E+02 -0 - 1 21 0.33124105E-02 0.32523330E+02 - 1 2 0.90633079E-01 0.32523330E+02 - 0.30633976E+05 - - - - 5 1 +6.6372287e+07 3.61852100e+01 7.54677100e-03 1.53832100e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.3002025114e+01 4.3002025114e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3404139915e+02 3.3404139915e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +5.4545664238e+00 -2.1319807632e+01 -2.9481524350e+02 2.9563544153e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.1100319594e+01 -8.7134369374e+00 +9.7127797801e+00 3.3726724614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -3.6554886018e+01 +3.0033244569e+01 -5.9369103163e+00 4.7681258113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.36185209E+02 -0 - 1 21 0.66156963E-02 0.36185209E+02 - 1 21 0.51390983E-01 0.36185209E+02 - 0.59410236E+05 - - - - 5 1 +6.6372287e+07 2.68952500e+01 7.54677100e-03 1.63518300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.8770752959e+02 2.8770752959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.6931152162e+02 2.6931152162e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.0961573832e+01 +2.3688081609e+00 -1.7971774862e+02 1.8095156257e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -3.7349663467e+00 -2.3130388947e+01 -8.6786083310e+01 8.9893209547e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.7226607485e+01 +2.0761580786e+01 +2.8489983990e+02 2.8617427909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.26895249E+02 -0 - 1 21 0.44262697E-01 0.26895249E+02 - 1 21 0.41432541E-01 0.26895249E+02 - 0.32158164E+04 - - - - 5 1 +6.6372287e+07 2.51016900e+01 7.54677100e-03 1.65954600e-01 - 2 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +6.4617848855e+01 6.4617848855e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -4.5852280566e+01 4.5852280566e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +1.2633388858e+01 +1.7296317379e+01 -2.9732559349e+01 3.6644101767e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +1.3130515800e+01 -2.4553339855e+01 +1.7570358035e+01 3.2924070597e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.5763904658e+01 +7.2570224764e+00 +3.0927769604e+01 4.0901957057e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.25101687E+02 -0 - 1 21 0.70541970E-02 0.25101687E+02 - 1 2 0.99412075E-02 0.25101687E+02 - 0.89083039E+05 - - - - 5 1 +6.6372287e+07 2.65415900e+01 7.54677100e-03 1.63979800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.3037786153e+01 7.3037786153e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -8.2017257442e+01 8.2017257442e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.8697084487e+01 -8.2924898880e+00 -5.9625078565e+01 6.3035675222e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +6.9951105287e+00 +2.5911255642e+01 +5.4380851637e+01 6.0643233464e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -2.5692195015e+01 -1.7618765754e+01 -3.7352443596e+00 3.1376134909e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26541591E+02 -0 - 1 21 0.11236582E-01 0.26541591E+02 - 1 21 0.12618040E-01 0.26541591E+02 - 0.30903565E+06 - - - - 5 1 +6.6372287e+07 2.27761200e+01 7.54677100e-03 1.69516500e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +8.6173848945e+01 8.6173848945e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.3585034087e+01 6.3585034087e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.7032116927e+01 +8.2973252626e+00 +1.6307827832e+01 3.2642398819e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.9247818195e+01 +1.2926007751e+01 -5.0735466398e+01 5.5782145282e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +7.7842987316e+00 -2.1223333014e+01 +5.7016453425e+01 6.1334338931e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.22776118E+02 -0 - 1 21 0.13257515E-01 0.22776118E+02 - 1 21 0.97823130E-02 0.22776118E+02 - 0.35046139E+06 - - - - 5 1 +6.6372287e+07 3.80456700e+01 7.54677100e-03 1.52310600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3757684306e+01 1.3757684306e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.9694458511e+02 3.9694458511e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 -1.4438846203e+01 -1.7702498483e+01 -8.3619869477e+01 8.6684146783e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.4547368666e+01 -2.2261281669e+01 -2.7669903846e+02 2.7797400684e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.0852246356e-01 +3.9963780152e+01 -2.2867992874e+01 4.6044115794e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.38045671E+02 -0 - 1 21 0.21165667E-02 0.38045671E+02 - 1 21 0.61068400E-01 0.38045671E+02 - 0.26069372E+06 - - - - 5 1 +6.6372287e+07 4.59126200e+01 7.54677100e-03 1.46872300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.0311609080e+03 2.0311609080e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.4721053331e+01 2.4721053331e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +5.4574055215e+00 +4.5277462040e+01 +1.8656515221e+03 1.8662088398e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.1344441190e+01 +1.9716704689e+01 +9.5251287709e+01 1.0219603832e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -3.6801846712e+01 -6.4994166729e+01 +4.5537044828e+01 8.7477083181e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.45912622E+02 -0 - 1 21 0.31248616E+00 0.45912622E+02 - 1 21 0.38032406E-02 0.45912622E+02 - 0.16431983E+04 - - - - 5 1 +6.6372287e+07 3.23160000e+01 7.54677100e-03 1.57381400e-01 - 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +5.8873030751e+02 5.8873030751e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -2.0960881505e+01 2.0960881505e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +1.2271827274e+01 +2.2294515262e+01 +1.4647225131e+02 1.4866661885e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.0219290079e+01 -9.7210766525e-02 -1.3589093295e+01 2.4361703508e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 -3.2491117354e+01 -2.2197304495e+01 +4.3488626799e+02 4.3666286666e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32316004E+02 -0 - 1 21 0.32247515E-02 0.32316004E+02 - 1 1 0.90573881E-01 0.32316004E+02 - 0.20132875E+05 - - - - 5 1 +6.6372287e+07 4.71162200e+01 7.54677100e-03 1.46154700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.5698751231e+01 3.5698751231e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5024197302e+02 1.5024197302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -1.8393329399e+01 -2.0730266036e+01 +1.5134275211e+01 3.1576966011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +3.0318818959e+01 +4.1981229081e+01 -1.1447233348e+02 1.2564063639e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.1925489561e+01 -2.1250963045e+01 -1.5205163517e+01 2.8723121856e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.47116224E+02 -0 - 1 21 0.54921154E-02 0.47116224E+02 - 1 21 0.23114150E-01 0.47116224E+02 - 0.37322948E+06 - - - - 5 1 +6.6372287e+07 5.00477800e+01 7.54677100e-03 1.44508400e-01 - 2 -1 0 0 503 0 +0.0000000000e+00 +0.0000000000e+00 +3.8911714874e+02 3.8911714874e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -3.0237812812e+02 3.0237812812e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +2.7035206944e+01 +1.1996257553e+01 +1.2886486117e+02 1.3221560064e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.2748413249e+01 -4.4788353833e+01 -2.9753120134e+02 3.0174211293e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 -4.2867936948e+00 +3.2792096280e+01 +2.5540536079e+02 2.5753756328e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.50047775E+02 -0 - 1 2 0.59864176E-01 0.50047775E+02 - 1 2 0.46519712E-01 0.50047775E+02 - 0.13730376E+03 - - - - 5 1 +6.6372287e+07 2.97093000e+01 7.54677100e-03 1.60132400e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.1854806368e+02 1.1854806368e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.8789738375e+02 2.8789738375e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.1808462259e+01 +1.9091314842e+01 +2.7760747717e+01 4.0134105724e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +5.0913227713e+01 -2.3763320633e+01 +7.4385366719e+01 9.3220356927e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -2.9104765455e+01 +4.6720057908e+00 -2.7149543450e+02 2.7309098477e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29709305E+02 -0 - 1 21 0.18238164E-01 0.29709305E+02 - 1 1 0.44291905E-01 0.29709305E+02 - 0.26942502E+04 - - - - 5 1 +6.6372287e+07 2.76081800e+01 7.54677100e-03 1.62613900e-01 - 1 -1 0 0 501 0 +0.0000000000e+00 +0.0000000000e+00 +3.5918383131e+02 3.5918383131e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -9.9599640123e+00 9.9599640123e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.0226056517e+01 -1.7566903028e+01 +5.4113018863e+01 5.7804732716e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -1.8623508510e+01 -1.7954792984e+01 +2.3170430073e+02 2.3314393117e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 503 +2.8849565027e+01 +3.5521696012e+01 +6.3406547700e+01 7.8195131441e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27608180E+02 -0 - 1 1 0.55259041E-01 0.27608180E+02 - 1 -4 0.15323024E-02 0.27608180E+02 - 0.31455192E+04 - - - - 5 1 +6.6372287e+07 3.28240500e+01 7.54677100e-03 1.56881800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.7881981423e+00 5.7881981423e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2709795693e+03 1.2709795693e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.9221921883e+01 +8.0360733545e+00 -5.6807285970e+02 5.6888071959e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.6963846300e+01 -2.9307832371e+01 -5.0500567831e+01 6.0803194577e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +1.2258075583e+01 +2.1271759017e+01 -6.4661794361e+02 6.4708385326e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32824047E+02 -0 - 1 21 0.89049153E-03 0.32824047E+02 - 1 21 0.19553543E+00 0.32824047E+02 - 0.68369125E+05 - - - - 5 1 +6.6372287e+07 2.96747300e+01 7.54677100e-03 1.60171200e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +4.0441816137e+01 4.0441816137e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.0683900114e+02 1.0683900114e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.6554357024e+01 +3.2973328106e+01 +7.1332905671e+00 4.2933181546e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +2.0427248287e+01 -1.1559769425e+01 +3.8860694556e+00 2.3790802373e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +6.1271087369e+00 -2.1413558681e+01 -7.7416545026e+01 8.0556833358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29674731E+02 -0 - 1 21 0.62218180E-02 0.29674731E+02 - 1 2 0.16436769E-01 0.29674731E+02 - 0.63902127E+05 - - - - 5 1 +6.6372287e+07 2.77849600e+01 7.54677100e-03 1.62394800e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5901367143e+01 5.5901367143e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 -1 0 0 501 0 +0.0000000000e+00 -0.0000000000e+00 -1.6721775392e+02 1.6721775392e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.7214919673e+01 +1.5725661972e+01 -4.6993551561e+00 2.3785160136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 1 1 2 502 0 -7.1368635003e+00 -2.7166369610e+01 -1.4763278291e+02 1.5028102025e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 503 +2.4351783173e+01 +1.1440707638e+01 +4.1015751290e+01 4.9052940675e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27784960E+02 -0 - 1 4 0.25725808E-01 0.27784960E+02 - 1 -2 0.86002106E-02 0.27784960E+02 - 0.26447976E+03 - - - - 5 1 +6.6372287e+07 5.39590300e+01 7.54677100e-03 1.42508700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.9800586950e+01 5.9800586950e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -7.5246990846e+01 7.5246990846e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.7215966258e+01 -1.3481800279e+01 -1.3120982800e+01 2.5501149436e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 -4.4948096317e+01 +2.7284215874e+01 +3.3376063191e+01 6.2279381760e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 +2.7732130059e+01 -1.3802415596e+01 -3.5701484287e+01 4.7267046599e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.53959031E+02 -0 - 1 21 0.92000904E-02 0.53959031E+02 - 1 21 0.11576460E-01 0.53959031E+02 - 0.54280239E+06 - - - - 5 1 +6.6372287e+07 3.74932000e+01 7.54677100e-03 1.52751300e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +7.5616997299e+02 7.5616997299e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -9.5383624010e+00 9.5383624010e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +2.4198598086e+01 -2.8579386523e+01 +4.8877048754e+01 6.1573690634e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -4.1183674764e+01 +1.1831705659e+01 +1.5380528884e+02 1.5966261679e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +1.6985076678e+01 +1.6747680864e+01 +5.4394927300e+02 5.4447202797e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.37493203E+02 -0 - 1 21 0.11633379E+00 0.37493203E+02 - 1 21 0.14674410E-02 0.37493203E+02 - 0.11386933E+06 - - - - 5 1 +6.6372287e+07 4.04284600e+01 7.54677100e-03 1.50508200e-01 - 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +4.0351583823e+02 4.0351583823e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.3201441918e+01 1.3201441918e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 501 0 +4.2439120380e+01 +1.6559287496e+01 +4.1887418346e+01 6.1885739526e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 502 0 -2.1117804890e+01 +8.9613985453e+00 +2.4763202138e+02 2.4869235283e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 1 1 2 0 501 -2.1321315491e+01 -2.5520686042e+01 +1.0079495659e+02 1.0613918778e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40428461E+02 -0 - 1 21 0.62079350E-01 0.40428461E+02 - 1 4 0.20309914E-02 0.40428461E+02 - 0.80181641E+04 - - - - 5 1 +6.6372287e+07 3.42643000e+01 7.54677100e-03 1.55523200e-01 - 1 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +3.3344584104e+03 3.3344584104e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -9.8032652357e-01 9.8032652357e-01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.6378085835e+01 +1.3734600814e+01 +2.6780940543e+02 2.6866104777e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.1482442393e+01 -1.5710524314e+01 +4.2265106582e+02 4.2348818088e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +3.7860528228e+01 +1.9759235008e+00 +2.6430176126e+03 2.6432895082e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34264300E+02 -0 - 1 21 0.15082213E-03 0.34264300E+02 - 1 1 0.51298452E+00 0.34264300E+02 - 0.33282672E+05 - - - - 5 1 +6.6372287e+07 1.19571300e+02 7.54677100e-03 1.24380600e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.7854154861e+01 8.7854154861e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.7934232150e+03 1.7934232150e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.7605310395e+01 +1.3701422770e+01 +2.6274785914e+01 3.4467960701e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 502 0 +1.1756210603e+02 +1.7577983299e+01 -1.7004020433e+03 1.7045518306e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 502 -1.3516741643e+02 -3.1279406068e+01 -3.1441802775e+01 1.4225757860e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.11957126E+03 -0 - 1 2 0.27591127E+00 0.11957126E+03 - 1 -1 0.13516023E-01 0.11957126E+03 - 0.43015636E+02 - - - - 5 1 +6.6372287e+07 2.58481600e+01 7.54677100e-03 1.64911000e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.4121175553e+01 1.4121175553e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -4.5285316425e+02 4.5285316425e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.4043566251e+01 +1.9773590139e+01 -2.4347029469e+02 2.4467529711e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -2.6394599884e+01 -1.3369434684e+00 -6.6055666935e-01 2.6436691472e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +1.2351033633e+01 -1.8436646670e+01 -1.9460113733e+02 1.9586235121e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.25848162E+02 -0 - 1 21 0.21724885E-02 0.25848162E+02 - 1 21 0.69669719E-01 0.25848162E+02 - 0.18427689E+06 - - - - 5 1 +6.6372287e+07 5.64794400e+01 7.54677100e-03 1.41322900e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7881487994e+00 3.7881487994e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -8.3293306677e+02 8.3293306677e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.2042587179e+01 +4.9935512809e+01 -3.4469253261e+02 3.4898764036e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.0400442402e+01 -2.1391345840e+01 -1.5133248122e+02 1.5319033504e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -1.1642144777e+01 -2.8544166969e+01 -3.3311990414e+02 3.3454324016e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.56479436E+02 -0 - 1 21 0.58279180E-03 0.56479436E+02 - 1 1 0.12814362E+00 0.56479436E+02 - 0.18915759E+06 - - - - 5 1 +6.6372287e+07 1.20437600e+02 7.54677100e-03 1.24237700e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.6326880933e+01 7.6326880933e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -5.6424004213e+02 5.6424004213e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.3358633684e+02 -3.2803021929e+01 -2.5791576009e+01 1.3995196687e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +9.1619991808e+01 +2.3833933827e+01 -1.0682884494e+02 1.4273990827e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 +4.1966345031e+01 +8.9690881026e+00 -3.5529274025e+02 3.5787504793e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.12043761E+03 -0 - 1 21 0.11742597E-01 0.12043761E+03 - 1 1 0.86806160E-01 0.12043761E+03 - 0.26061044E+04 - - - - 5 1 +6.6372287e+07 5.37227900e+01 7.54677100e-03 1.42623700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3304377309e+02 1.3304377309e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.0963777946e+01 7.0963777946e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.5841020033e+01 +3.7513476754e+01 -1.3490766536e+01 4.2897548130e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -5.0497052458e+01 -2.8085691040e+00 +6.9558481825e+01 8.6001295113e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +6.6338072490e+01 -3.4704907650e+01 +6.0122798510e+00 7.5108707789e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.53722789E+02 -0 - 1 21 0.20468273E-01 0.53722789E+02 - 1 21 0.10917504E-01 0.53722789E+02 - 0.14842924E+06 - - - - 5 1 +6.6372287e+07 4.63666500e+01 7.54677100e-03 1.46598600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.9404929403e+01 7.9404929403e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.0571876305e+01 4.0571876305e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +3.8096004013e+01 -2.0103472745e+01 +5.5898111257e+01 7.0569497520e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -2.4864947573e+01 +3.4826673187e+00 -8.6196401551e+00 2.6546050287e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.3231056440e+01 +1.6620805427e+01 -8.4454180038e+00 2.2861257901e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.46366646E+02 -0 - 1 21 0.12216143E-01 0.46366646E+02 - 1 21 0.62418271E-02 0.46366646E+02 - 0.93143569E+06 - - - - 5 1 +6.6372287e+07 3.23809700e+01 7.54677100e-03 1.57316900e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.7524908213e+01 2.7524908213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -1.6414512109e+02 1.6414512109e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 +1.0735752809e+01 +2.3888282841e+01 -4.2028929504e+01 4.9521079963e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +8.4348718406e+00 -4.8157198822e+01 -9.9473441559e+01 1.1083874971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -1.9170624649e+01 +2.4268915981e+01 +4.8821581875e+00 3.1310199631e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32380969E+02 -0 - 1 21 0.42346013E-02 0.32380969E+02 - 1 21 0.25253095E-01 0.32380969E+02 - 0.46542943E+06 - - - - 5 1 +6.6372287e+07 3.04619800e+01 7.54677100e-03 1.59303800e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.2711935090e+03 2.2711935090e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -2.7585117854e+00 2.7585117854e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.0648195639e+01 -1.1330743851e+01 +3.5163773314e+02 3.5242563628e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -2.3394837542e+01 -2.0181653530e+01 +1.0496115767e+02 1.0941417724e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +2.7466419032e+00 +3.1512397381e+01 +1.8118361064e+03 1.8121122073e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30461979E+02 -0 - 1 21 0.42438824E-03 0.30461979E+02 - 1 2 0.34941289E+00 0.30461979E+02 - 0.91450792E+05 - - - - 5 1 +6.6372287e+07 5.05952900e+01 7.54677100e-03 1.44215700e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7310225510e+01 1.7310225510e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -1.2474989233e+03 1.2474989233e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -2.0960667079e+01 -3.2621697190e+00 -1.3563546253e-01 2.1213432364e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -3.0621090753e+01 +1.1876026904e+01 -3.7308678078e+01 4.9705418959e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +5.1581757832e+01 -8.6138571850e+00 -1.1927443843e+03 1.1938902975e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.50595294E+02 -0 - 1 21 0.26631115E-02 0.50595294E+02 - 1 2 0.19192292E+00 0.50595294E+02 - 0.17875597E+05 - - - - 5 1 +6.6372287e+07 5.35686700e+01 7.54677100e-03 1.42699200e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.0775922111e+01 2.0775922111e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.9382658620e+02 1.9382658620e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -2.0032585171e+01 +8.2484330843e+00 -1.7029580071e+01 2.7556264521e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -3.0721524725e+01 +4.2318877201e+00 -1.1017363691e+01 3.2910534147e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 +5.0754109896e+01 -1.2480320804e+01 -1.4500372033e+02 1.5413570964e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.53568673E+02 -0 - 1 21 0.31962958E-02 0.53568673E+02 - 1 21 0.29819474E-01 0.53568673E+02 - 0.56798716E+06 - - - - 5 1 +6.6372287e+07 3.85025300e+01 7.54677100e-03 1.51953000e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.2289530809e+02 2.2289530809e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 3 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1799706113e+01 1.1799706113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.1037505294e+01 +3.1358606708e+01 +8.1763268338e+01 9.0062039134e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +1.7522488983e+01 -1.3904294537e+01 +4.7427977092e+01 5.2438345109e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 3 1 1 2 501 0 -3.8559994278e+01 -1.7454312171e+01 +8.1904356543e+01 9.2194629956e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.38502528E+02 -0 - 1 21 0.34291583E-01 0.38502528E+02 - 1 3 0.18153396E-02 0.38502528E+02 - 0.34255031E+05 - - - - 5 1 +6.6372287e+07 3.75647200e+01 7.54677100e-03 1.52693700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.1007388093e+00 5.1007388093e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.1898429651e+03 1.1898429651e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.3577055731e+01 +8.6164758199e+00 -5.6840562318e+02 5.6895964152e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.7661209766e+01 -3.2338872055e+01 -8.6569325177e+01 9.4084903347e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +4.1238265497e+01 +2.3722396235e+01 -5.2976727797e+02 5.3189915908e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.37564724E+02 -0 - 1 21 0.78472893E-03 0.37564724E+02 - 1 21 0.18305279E+00 0.37564724E+02 - 0.98499507E+05 - - - - 5 1 +6.6372287e+07 1.43855200e+02 7.54677100e-03 1.20823400e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7613958321e+02 1.7613958321e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.5721546644e+02 2.5721546644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.8697441469e+01 +4.6743956885e+01 +6.3444037986e+01 8.0992208136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 +1.5605198954e+01 -1.3052773015e+02 -2.0496954320e+02 2.4350261644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 -3.4302640423e+01 +8.3783773266e+01 +6.0449621991e+01 1.0886022507e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.14385516E+03 -0 - 1 21 0.27098398E-01 0.14385516E+03 - 1 21 0.39571610E-01 0.14385516E+03 - 0.81522626E+04 - - - - 5 1 +6.6372287e+07 2.99475600e+01 7.54677100e-03 1.59866900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.1735167452e+02 1.1735167452e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.7731005686e+01 6.7731005686e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -3.0037339775e+01 +1.9906567203e+01 +7.7442255748e+01 8.5415549954e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 +3.9581584148e+00 -2.3241579138e+01 -5.6222098856e+01 6.0965255831e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +2.6079181360e+01 +3.3350119355e+00 +2.8400511938e+01 3.8701874417e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29947557E+02 -0 - 1 21 0.18054104E-01 0.29947557E+02 - 1 21 0.10420155E-01 0.29947557E+02 - 0.19218372E+06 - - - - 5 1 +6.6372287e+07 5.51267100e+01 7.54677100e-03 1.41950100e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.3605621517e+02 3.3605621517e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.6748192173e+02 1.6748192173e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -3.6502084271e+01 +1.2779288039e+01 +2.6845966100e+00 3.8767504664e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 +5.6330187720e+01 -2.0382049136e+01 -1.4305805077e+02 1.5509391950e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.9828103448e+01 +7.6027610964e+00 +3.0894774760e+02 3.0967671273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.55126711E+02 -0 - 1 21 0.51700956E-01 0.55126711E+02 - 1 21 0.25766450E-01 0.55126711E+02 - 0.55367291E+04 - - - - 5 1 +6.6372287e+07 2.38060800e+01 7.54677100e-03 1.67876700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.6292151055e+01 5.6292151055e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -1.0100350383e+02 1.0100350383e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -1.7078783867e+01 +1.3980353782e+01 +4.3115131723e+01 4.8436037552e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +1.9550036137e+01 +1.1807248332e+01 -6.8557268747e+00 2.3845670826e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -2.4712522707e+00 -2.5787602114e+01 -8.0970757619e+01 8.5013946502e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.23806083E+02 -0 - 1 21 0.86603311E-02 0.23806083E+02 - 1 21 0.15539000E-01 0.23806083E+02 - 0.32926995E+06 - - - - 5 1 +6.6372287e+07 4.37816800e+01 7.54677100e-03 1.48209400e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.6069687389e+01 7.6069687389e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.3690305890e+01 6.3690305890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -3.1851837126e+01 -2.0905814741e+01 -1.0380698209e+01 3.9488625117e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +5.2242018124e+01 +1.1087919075e+01 -1.5683784728e+01 5.5661041227e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 503 -2.0390180998e+01 +9.8178956656e+00 +3.8443864437e+01 4.4610326936e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43781676E+02 -0 - 1 21 0.97985087E-02 0.43781676E+02 - 1 -2 0.11703029E-01 0.43781676E+02 - 0.29443686E+05 - - - - 5 1 +6.6372287e+07 6.86898000e+01 7.54677100e-03 1.36460600e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.1667938332e+02 2.1667938332e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5467804492e+02 1.5467804492e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 -1.1038053112e+01 +2.3433804553e+01 -2.7774504919e+01 3.7979006514e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -1.5088204651e+01 +3.2631253632e+01 -1.1446451215e+02 1.1997740276e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +2.6126257764e+01 -5.6065058184e+01 +2.0424035546e+02 2.1340101896e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.68689805E+02 -0 - 1 21 0.33335290E-01 0.68689805E+02 - 1 21 0.23796622E-01 0.68689805E+02 - 0.15057095E+05 - - - - 5 1 +6.6372287e+07 5.62723200e+01 7.54677100e-03 1.41417600e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.2380787192e+02 1.2380787192e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.0327409785e+03 2.0327409785e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.3729827285e+01 -2.7343988978e+01 -1.7426108947e+03 1.7429869560e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -5.3672579255e+00 -2.5936900616e+01 -2.8286809267e+02 2.8410541727e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 -1.8362569359e+01 +5.3280889594e+01 +1.1654588079e+02 1.2945647716e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.56272325E+02 -0 - 1 21 0.31272939E+00 0.56272325E+02 - 1 -2 0.19047365E-01 0.56272325E+02 - 0.69988168E+01 - - - - 5 1 +6.6372287e+07 3.06264600e+01 7.54677100e-03 1.59126500e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.3143843443e+00 7.3143843443e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8991776011e+02 2.8991776011e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.7176763448e+01 -5.9122717629e+00 -8.2589250105e+01 8.7146518358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +3.7114675978e+00 -2.1312114619e+01 -3.4556109190e+01 4.0768933069e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +2.3465295850e+01 +2.7224386382e+01 -1.6545801647e+02 1.6931669302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.30626458E+02 -0 - 1 21 0.11252898E-02 0.30626458E+02 - 1 21 0.44602736E-01 0.30626458E+02 - 0.12571056E+07 - - - - 5 1 +6.6372287e+07 2.39754200e+01 7.54677100e-03 1.67617000e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.1086457142e+00 3.1086457142e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -1.1096565876e+03 1.1096565876e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.8074701851e+01 -1.2471200900e+01 -4.7301063874e+01 5.2149940964e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.5797907864e+01 -1.5878819740e+01 -2.9957929976e+02 3.0041549170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.2767939866e+00 +2.8350020640e+01 -7.5966757822e+02 7.6019980061e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.23975415E+02 -0 - 1 21 0.47825339E-03 0.23975415E+02 - 1 2 0.17071633E+00 0.23975415E+02 - 0.26724670E+06 - - - - 5 1 +6.6372287e+07 2.53344800e+01 7.54677100e-03 1.65624400e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.1570098369e+02 5.1570098369e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.8445706221e+02 1.8445706221e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +2.4207533199e+01 -7.3828354628e+00 +5.0432335498e+02 5.0495797578e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +8.1618412819e+00 -2.1011090789e+01 -6.9595529015e+00 2.3590611815e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 -3.2369374481e+01 +2.8393926251e+01 -1.6611988060e+02 1.7160945831e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.25334477E+02 -0 - 1 21 0.79338612E-01 0.25334477E+02 - 1 -1 0.28378010E-01 0.25334477E+02 - 0.16118883E+03 - - - - 5 1 +6.6372287e+07 4.96978700e+01 7.54677100e-03 1.44697800e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.1669869678e+01 4.1669869678e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -7.2169785693e+02 7.2169785693e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.0137451528e+01 -1.9749989160e+01 -4.1326305480e+02 4.1385889197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +7.1118147579e-01 -2.6001039854e+01 -2.9419229654e+02 2.9533991805e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 +9.4262700527e+00 +4.5751029015e+01 +2.7427364092e+01 5.4168916585e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.49697867E+02 -0 - 1 21 0.11103045E+00 0.49697867E+02 - 1 -1 0.64107485E-02 0.49697867E+02 - 0.70621395E+03 - - - - 5 1 +6.6372287e+07 4.01879700e+01 7.54677100e-03 1.50683300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.1802510669e+01 7.1802510669e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.1337629671e+01 7.1337629671e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +5.8213069507e+00 +5.5036070403e+01 -5.3681175091e+00 5.5602817785e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -7.7116246928e+00 -3.4472047188e+01 -2.9891185989e+01 4.6273904012e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +1.8903177421e+00 -2.0564023215e+01 +3.5724184496e+01 4.1263418543e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40187974E+02 -0 - 1 21 0.11046540E-01 0.40187974E+02 - 1 21 0.10975020E-01 0.40187974E+02 - 0.42308800E+06 - - - - 5 1 +6.6372287e+07 2.09126700e+01 7.54677100e-03 1.72776500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.1568669659e+02 2.1568669659e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4598327407e+01 4.4598327407e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.0489102981e+01 +2.1031084145e+01 +3.3096570840e+00 2.3733554552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -2.0964401963e+01 -3.6307327249e+00 +1.9854727555e+02 1.9968402289e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 +1.0475298982e+01 -1.7400351420e+01 -3.0768563453e+01 3.6867446552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.20912670E+02 -0 - 1 21 0.33182568E-01 0.20912670E+02 - 1 21 0.68612813E-02 0.20912670E+02 - 0.12384532E+06 - - - - 5 1 +6.6372287e+07 2.76487000e+01 7.54677100e-03 1.62563500e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +8.8315219304e+00 8.8315219304e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -8.5548998670e+02 8.5548998670e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -2.6108367324e+01 +2.7834397992e+01 -5.0511958607e+01 6.3307649761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 3 1 1 2 501 0 -1.5502416378e+00 -2.7227504772e+01 -8.3708945045e+01 8.8039353392e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -3 1 1 2 0 503 +2.7658608962e+01 -6.0689322000e-01 -7.1243756111e+02 7.1297450547e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27648700E+02 -0 - 1 21 0.13586955E-02 0.27648700E+02 - 1 21 0.13161386E+00 0.27648700E+02 - 0.94752714E+05 - - - - 5 1 +6.6372287e+07 3.42335000e+01 7.54677100e-03 1.55551500e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5344269194e+01 5.5344269194e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5616791251e+02 5.5616791251e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -5.3992539344e+01 -1.9093964990e+01 -4.2937502340e+02 4.3317742845e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +2.0534474104e+01 +1.3341922147e+01 -1.1811465484e+02 1.2062646145e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +3.3458065241e+01 +5.7520428423e+00 +4.6666034926e+01 5.7708291798e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.34233500E+02 -0 - 1 21 0.85564297E-01 0.34233500E+02 - 1 -1 0.85145026E-02 0.34233500E+02 - 0.83709286E+03 - - - - 5 1 +6.6372287e+07 3.19061800e+01 7.54677100e-03 1.57792600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.0941997221e+01 3.0941997221e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4106991043e+02 1.4106991043e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -7.0637436764e+00 -2.8412231374e+01 -9.0188811038e+01 9.4821796033e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.2887679806e+01 +4.9262952225e+01 -2.1945566891e+01 5.5448522603e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -5.8239361293e+00 -2.0850720851e+01 +2.0064647242e+00 2.1741589011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.31906182E+02 -0 - 1 21 0.47603074E-02 0.31906182E+02 - 1 21 0.21703063E-01 0.31906182E+02 - 0.50542998E+06 - - - - 5 1 +6.6372287e+07 3.48184100e+01 7.54677100e-03 1.55021900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.1589074046e+03 2.1589074046e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.5961454167e+01 1.5961454167e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +2.0024453480e+01 +1.8334123955e+01 +1.9389015022e+03 1.9390915796e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -3.0260374134e+01 +1.8529083324e+01 +8.4480836704e+00 3.6474474489e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +1.0235920654e+01 -3.6863207279e+01 +1.9559636455e+02 1.9930280466e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34818413E+02 -0 - 1 21 0.33213937E+00 0.34818413E+02 - 1 21 0.24556100E-02 0.34818413E+02 - 0.27565033E+04 - - - - 5 1 +6.6372287e+07 3.59920600e+01 7.54677100e-03 1.53996300e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +9.7186356638e+01 9.7186356638e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -2.5874356624e+02 2.5874356624e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.8438172649e+01 -2.1235714101e+01 -8.7075130972e+01 9.1504099350e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +5.0238810426e+01 +4.6817397829e+00 -1.6209235767e+02 1.6976392189e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -3.1800637777e+01 +1.6553974318e+01 +8.7610279039e+01 9.4661901640e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.35992060E+02 -0 - 1 21 0.14951747E-01 0.35992060E+02 - 1 21 0.39806702E-01 0.35992060E+02 - 0.24770639E+05 - - - - 5 1 +6.6372287e+07 4.43353000e+01 7.54677100e-03 1.47853400e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.1509889175e+01 4.1509889175e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5021838758e+02 1.5021838758e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +2.1728377512e+01 -4.5038835640e+00 +4.7828765006e+00 2.2699851631e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +3.7288465851e+01 +1.8488032542e+01 -9.7938315943e+01 1.0641499313e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -5.9016843363e+01 -1.3984148978e+01 -1.5553058964e+01 6.2613431996e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.44335296E+02 -0 - 1 21 0.63861368E-02 0.44335296E+02 - 1 21 0.23110521E-01 0.44335296E+02 - 0.28976826E+06 - - - - 5 1 +6.6372287e+07 8.95697900e+01 7.54677100e-03 1.30389700e-01 - -1 -1 0 0 0 502 -0.0000000000e+00 +0.0000000000e+00 +1.0959072147e+02 1.0959072147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 -1 0 0 0 501 +0.0000000000e+00 -0.0000000000e+00 -2.3941333748e+02 2.3941333748e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +4.4433031586e+01 -8.6837769029e+00 +3.1109742872e+01 5.4931943155e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 1 1 2 0 502 -7.9188610214e+01 +3.3845172129e+01 -2.1114031298e+02 2.2802754971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +3.4755578628e+01 -2.5161395226e+01 +5.0207954089e+01 6.6044566085e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.89569794E+02 -0 - 1 -4 0.36832821E-01 0.89569794E+02 - 1 -1 0.16860111E-01 0.89569794E+02 - 0.65999041E+02 - - - - 5 1 +6.6372287e+07 3.30531100e+01 7.54677100e-03 1.56660200e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.4577216351e+00 7.4577216351e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.9293771215e+03 2.9293771215e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -8.0432547113e+00 +3.1734139686e+01 -3.2819056421e+01 4.6355582537e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -6.2607331878e+00 +2.0255158476e+01 -2.7252772679e+02 2.7335111139e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +1.4303987899e+01 -5.1989298162e+01 -2.6165726167e+03 2.6171281493e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.33053112E+02 -0 - 1 21 0.11473423E-02 0.33053112E+02 - 1 2 0.45067321E+00 0.33053112E+02 - 0.95448503E+04 - - - - 5 1 +6.6372287e+07 2.64822300e+01 7.54677100e-03 1.64058200e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.0433495267e+02 2.0433495267e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.1621962120e+01 1.1621962120e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -6.1453446806e-01 +2.8655724578e+01 +2.6926488695e+01 3.9326377880e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.9294771642e+01 -8.5428375362e+00 +2.2933975834e+01 3.1164651987e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -1.8680237174e+01 -2.0112887042e+01 +1.4285252602e+02 1.4546588493e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.26482232E+02 -0 - 1 21 0.17879943E-02 0.26482232E+02 - 1 2 0.31436144E-01 0.26482232E+02 - 0.22563568E+06 - - - - 5 1 +6.6372287e+07 2.94114000e+01 7.54677100e-03 1.60468700e-01 - 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +7.1456949129e+01 7.1456949129e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 +0.0000000000e+00 -0.0000000000e+00 -3.2341513368e+01 3.2341513368e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.2633702387e+01 +1.6779116547e+01 +3.5382422326e+01 4.1146871057e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +3.4850371802e+01 -4.6645490224e-01 +1.2205098572e-01 3.4853706995e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -2.2216669414e+01 -1.6312661644e+01 +3.6109624495e+00 2.7797884444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29411403E+02 -0 - 1 2 0.49756174E-02 0.29411403E+02 - 1 1 0.10993377E-01 0.29411403E+02 - 0.84192782E+04 - - - - 5 1 +6.6372287e+07 4.12907800e+01 7.54677100e-03 1.49892200e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.6408792035e+01 1.6408792035e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8426721197e+02 2.8426721197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -9.0227533210e+00 +4.0120314210e+01 -1.6777925940e+02 1.7274527367e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -2.8940986847e+01 -8.3287328478e+00 -3.5522385380e+01 4.6570252025e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +3.7963740168e+01 -3.1791581362e+01 -6.4556775153e+01 8.1360478312e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.41290780E+02 -0 - 1 21 0.25244298E-02 0.41290780E+02 - 1 21 0.43733413E-01 0.41290780E+02 - 0.38789728E+06 - - - - 5 1 +6.6372287e+07 3.01075800e+01 7.54677100e-03 1.59690300e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.0837614737e+02 1.0837614737e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -5.0187655412e+02 5.0187655412e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.4244911852e+01 +2.4839805338e+00 -4.4874897660e+01 5.1066058689e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 502 0 +9.4171551279e+00 +2.3796264682e+01 -4.5137403450e+02 4.5209895382e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 502 +1.4827756724e+01 -2.6280245215e+01 +1.0274852541e+02 1.0708768898e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.30107583E+02 -0 - 1 1 0.77211778E-01 0.30107583E+02 - 1 -1 0.16673253E-01 0.30107583E+02 - 0.11578566E+03 - - - - 5 1 +6.6372287e+07 2.75831300e+01 7.54677100e-03 1.62645100e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +1.5119270639e+01 1.5119270639e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 503 -0.0000000000e+00 -0.0000000000e+00 -4.4797372094e+02 4.4797372094e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.3933439080e+01 -1.5349180970e+01 -7.6519214979e+00 2.9444163518e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -9.1468995899e+00 +2.8848403826e+01 -5.3965016364e+01 6.1871796213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 -1.4786539490e+01 -1.3499222856e+01 -3.7123751244e+02 3.7177703185e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27583132E+02 -0 - 1 21 0.23260418E-02 0.27583132E+02 - 1 21 0.68919030E-01 0.27583132E+02 - 0.17059051E+06 - - - - 5 1 +6.6372287e+07 5.38232700e+01 7.54677100e-03 1.42574700e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +8.6931327192e+02 8.6931327192e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3878826862e+01 1.3878826862e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.1384767832e+01 -5.5414417532e+01 +5.7810173998e+02 5.8086314427e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +4.8586431572e+01 +3.3136350907e+01 +2.3303180192e+02 2.4033826119e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 -3.7201663740e+01 +2.2278066625e+01 +4.4300903156e+01 6.1990693318e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.53823273E+02 -0 - 1 21 0.13374046E+00 0.53823273E+02 - 1 21 0.21352048E-02 0.53823273E+02 - 0.45957317E+05 - - - - 5 1 +6.6372287e+07 4.09907500e+01 7.54677100e-03 1.50104500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.0992522075e+01 7.0992522075e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3949703176e+02 1.3949703176e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.1659851347e+01 -5.1077643128e+01 +4.3516059870e+01 6.8106719401e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.0482205563e+00 +2.8158917183e+01 -1.2900096167e+00 2.8352784524e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -1.4708071904e+01 +2.2918725946e+01 -1.1073055994e+02 1.1403004991e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40990751E+02 -0 - 1 21 0.10921927E-01 0.40990751E+02 - 1 21 0.21461082E-01 0.40990751E+02 - 0.13404950E+06 - - - - 5 1 +6.6372287e+07 3.36149800e+01 7.54677100e-03 1.56125500e-01 - 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +7.9720643936e+01 7.9720643936e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1015809106e+02 1.1015809106e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +2.0832938167e+01 +2.8798086026e+01 -8.7908167480e+01 9.4821869741e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 502 0 -3.2596791056e+01 +4.0076403627e+00 +4.9338106023e+01 5.9269390704e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 501 +1.1763852889e+01 -3.2805726389e+01 +8.1326143350e+00 3.5787474550e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33614975E+02 -0 - 1 21 0.12264714E-01 0.33614975E+02 - 1 1 0.16947399E-01 0.33614975E+02 - 0.16891392E+05 - - - - 5 1 +6.6372287e+07 5.25551000e+01 7.54677100e-03 1.43202800e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.5821819927e+02 4.5821819927e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -1.2383942127e+01 1.2383942127e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -5.0926951541e+01 -5.0815690566e+00 +5.0883321493e+01 7.2169863125e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +2.4440122995e+01 +3.5331599133e+00 +1.4625618020e+02 1.4832623867e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 +2.6486828546e+01 +1.5484091433e+00 +2.4869475544e+02 2.5010603960e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.52555102E+02 -0 - 1 21 0.19052222E-02 0.52555102E+02 - 1 -1 0.70495095E-01 0.52555102E+02 - 0.23587364E+05 - - - - 5 1 +6.6372287e+07 4.46278200e+01 7.54677100e-03 1.47667900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.5593250322e+02 5.5593250322e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8836808285e+02 1.8836808285e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +4.0713374762e+01 +1.7975906273e+01 +5.4492854609e+02 5.4674293085e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -4.8243013951e+01 -3.8385710795e+01 -1.0265016479e+02 1.1974100183e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +7.5296391891e+00 +2.0409804522e+01 -7.4713960932e+01 7.7816653392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.44627825E+02 -0 - 1 21 0.85528077E-01 0.44627825E+02 - 1 21 0.28979705E-01 0.44627825E+02 - 0.15745256E+04 - - - - 5 1 +6.6372287e+07 3.37382900e+01 7.54677100e-03 1.56009900e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.5881756914e+03 1.5881756914e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.3626711614e+01 1.3626711614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.5839259073e+01 -9.7303516134e+00 +1.6154599506e+02 1.6388854618e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.7069497306e+01 +2.9008026152e+01 +1.1420766537e+01 3.5542470761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +4.2908756379e+01 -1.9277674538e+01 +1.4015822182e+03 1.4023713861e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33738290E+02 -0 - 1 21 0.20964184E-02 0.33738290E+02 - 1 2 0.24433458E+00 0.33738290E+02 - 0.17276941E+05 - - - - 5 1 +6.6372287e+07 3.21182600e+01 7.54677100e-03 1.57578900e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.7751679366e+02 4.7751679366e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.2525410541e+01 2.2525410541e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.0544572028e+01 +2.8771952473e+01 +1.5170782840e+02 1.5477169782e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.6106310540e+01 +5.1888225486e+00 -1.0981232855e+01 2.8793242340e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -1.5561738512e+01 -3.3960775022e+01 +3.1426478757e+02 3.1647726404e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32118265E+02 -0 - 1 21 0.73464115E-01 0.32118265E+02 - 1 21 0.34654481E-02 0.32118265E+02 - 0.80826114E+05 - - - - 5 1 +6.6372287e+07 4.09504400e+01 7.54677100e-03 1.50133100e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.7536299147e+02 1.7536299147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.6964015600e+01 3.6964015600e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +4.0714726769e+00 +5.7850454719e+01 +1.0890596398e+00 5.8003776186e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +4.7732900088e+00 -1.9654732976e+01 +1.3161726099e+01 2.4131387440e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -8.8447626857e+00 -3.8195721743e+01 +1.2414819013e+02 1.3019184344e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.40950439E+02 -0 - 1 21 0.26978921E-01 0.40950439E+02 - 1 21 0.56867717E-02 0.40950439E+02 - 0.26246574E+06 - - - - 5 1 +6.6372287e+07 2.73967700e+01 7.54677100e-03 1.62878500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +9.4621731867e+02 9.4621731867e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -5.2344682715e+02 5.2344682715e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +3.5097651064e+01 +5.1743549524e+00 +5.7624905467e+01 6.7670146960e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -1.6769759782e+01 -2.1740470328e+01 +8.8306196865e+02 8.8348871717e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.8327891283e+01 +1.6566115375e+01 -5.1791638260e+02 5.1850528169e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27396771E+02 -0 - 1 21 0.14557190E+00 0.27396771E+02 - 1 21 0.80530281E-01 0.27396771E+02 - 0.67303146E+02 - - - - 5 1 +6.6372287e+07 8.17190300e+01 7.54677100e-03 1.32424300e-01 - 1 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +3.6186655864e+02 3.6186655864e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -8.5536214190e+01 8.5536214190e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +6.9748665717e+01 -2.1225395602e+01 +1.1519982450e+02 1.3633192345e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +1.4237054539e+01 +2.7062837406e+01 +2.1101094416e+02 2.1321517170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -8.3985720256e+01 -5.8374418043e+00 -4.9880424207e+01 9.7855677673e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.81719032E+02 -0 - 1 1 0.55671777E-01 0.81719032E+02 - 1 1 0.13159418E-01 0.81719032E+02 - 0.34676915E+03 - - - - 5 1 +6.6372287e+07 3.21808800e+01 7.54677100e-03 1.57516200e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.6785736656e+03 1.6785736656e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.6055706089e+00 2.6055706089e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.3779710329e+01 -3.5600343603e+01 +9.7923046116e+02 9.7997426545e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.3134215327e+01 +6.9265799703e+00 +7.6205484473e+01 7.9940260774e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 -9.3545049987e+00 +2.8673763633e+01 +6.2053214934e+02 6.2126470997e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32180882E+02 -0 - 1 21 0.40085836E-03 0.32180882E+02 - 1 -1 0.25824124E+00 0.32180882E+02 - 0.94656396E+04 - - - - 5 1 +6.6372287e+07 2.72410900e+01 7.54677100e-03 1.63075300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +4.0924620761e+01 4.0924620761e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.0688357873e+02 1.0688357873e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +1.4260677542e+01 +2.3044763190e+01 +2.6464656953e+01 3.7878834487e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -3.4418262802e+01 -1.1596463544e+01 -5.7672141314e+01 6.8155488882e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +2.0157585259e+01 -1.1448299646e+01 -3.4751473609e+01 4.1773876122e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27241089E+02 -0 - 1 21 0.62960956E-02 0.27241089E+02 - 1 21 0.16443627E-01 0.27241089E+02 - 0.51232088E+06 - - - - 5 1 +6.6372287e+07 8.96432400e+01 7.54677100e-03 1.30371800e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.9106265508e+03 1.9106265508e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -8.3144018617e+00 8.3144018617e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +2.0112949282e+01 -5.3085600688e+01 +9.4969449400e+02 9.5138963820e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -4.7004645833e+00 -3.1846193563e+01 +6.9718679537e+02 6.9792958244e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.5412484699e+01 +8.4931794252e+01 +2.5543085956e+02 2.6962173201e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.89643239E+02 -0 - 1 21 0.29394220E+00 0.89643239E+02 - 1 21 0.12791403E-02 0.89643239E+02 - 0.10444343E+05 - - - - 5 1 +6.6372287e+07 3.51788400e+01 7.54677100e-03 1.54701800e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.4477402762e+01 2.4477402762e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.3548028651e+02 1.3548028651e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +5.2535145793e+00 -1.9356004202e+01 -7.2383362573e+01 7.5110621695e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -1.7320229400e+01 -2.4467895105e+01 -1.3062058210e+01 3.2699932753e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 502 +1.2066714821e+01 +4.3823899307e+01 -2.5557462961e+01 5.2147134821e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.35178836E+02 -0 - 1 21 0.37657543E-02 0.35178836E+02 - 1 -4 0.20843121E-01 0.35178836E+02 - 0.26617371E+05 - - - - 5 1 +6.6372287e+07 5.53124500e+01 7.54677100e-03 1.41862700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.4660412160e+02 2.4660412160e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.8448765078e+02 7.8448765078e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -4.2504266063e+01 -2.5589506438e+01 -2.1846702105e+02 2.2402962920e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +6.8994906150e+01 -2.2979517961e+01 -5.5770982325e+02 5.6243097556e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -2.6490640086e+01 +4.8569024399e+01 +2.3829331512e+02 2.4463116762e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.55312446E+02 -0 - 1 21 0.37939096E-01 0.55312446E+02 - 1 21 0.12069041E+00 0.55312446E+02 - 0.42114463E+03 - - - - 5 1 +6.6372287e+07 3.03757500e+01 7.54677100e-03 1.59397200e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.2971462251e+03 1.2971462251e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -5.6448232432e+00 5.6448232432e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -2.3055791365e+00 +2.0675048888e+01 +2.1686626098e+02 2.1786176464e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.9371916466e+01 -8.2619650311e+00 +4.2957332697e+01 5.2690625120e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +3.1677495603e+01 -1.2413083857e+01 +1.0316778082e+03 1.0322386586e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30375748E+02 -0 - 1 21 0.86843538E-03 0.30375748E+02 - 1 2 0.19956072E+00 0.30375748E+02 - 0.91540288E+05 - - - - 5 1 +6.6372287e+07 4.60134200e+01 7.54677100e-03 1.46811200e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.7829784629e+01 1.7829784629e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.8514522110e+02 2.8514522110e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -3.0860485485e+01 +3.4588417908e+01 -6.7479109992e+01 8.1866711813e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.6117598747e+01 +1.5550519447e+01 -1.3824680405e+02 1.4154902490e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 +4.7428867379e+00 -5.0138937355e+01 -6.1589522426e+01 7.9559269015e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.46013420E+02 -0 - 1 21 0.43868495E-01 0.46013420E+02 - 1 -1 0.27430438E-02 0.46013420E+02 - 0.16895399E+05 - - -
From d041b3580ee6d00b1835f3347795bb3ca73f6f6a Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 11 Oct 2023 16:08:52 +0200 Subject: [PATCH 09/76] added rex files --- tools/REX/REX.hpp | 2459 +++++++++++++++++++++++++++++++++++++++++ tools/REX/pepper.cu | 169 +++ tools/REX/teawREX.hpp | 470 ++++++++ 3 files changed, 3098 insertions(+) create mode 100644 tools/REX/REX.hpp create mode 100644 tools/REX/pepper.cu create mode 100644 tools/REX/teawREX.hpp diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp new file mode 100644 index 0000000000..703f799d95 --- /dev/null +++ b/tools/REX/REX.hpp @@ -0,0 +1,2459 @@ +/*** + * ______ _______ __ + * | ___ \ ___\ \ / / + * | |_/ / |__ \ V / + * | /| __| / \ + * | |\ \| |___/ /^\ \ + * \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ZW: all fcns within the REX standard sit in the +// namespace REX +// Note that as a convention, std::string_view objects will be +// referred to as strings unless the difference is relevant +namespace REX +{ + #pragma warning( push ) + #pragma warning( disable : 4101) + static const size_t npos = -1; + #pragma warning( pop ) + // ZW: minimal fcn for counting the amount of times + // a given search term appears in a string + int nuStrCount( std::string_view searchString, std::string_view searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = searchString.find(searchTerm, pos)) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: fcn for finding the location of each + // entry of seachTerm in the given string textFile + // Pre-allocates vector memory using nuStrCount + std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ) + { + auto eachPos = std::make_shared>(); + eachPos->reserve( nuStrCount(textFile, searchTerm) ); + eachPos->push_back( textFile.find( searchTerm ) ); + size_t currPos = textFile.find( searchTerm, eachPos->at(0) + 1 ); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = textFile.find( searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element differentiated by linebreaks in the original string + // Removes sequential linebreaks, ie "\n\n\n" would + // only result in a single element separation + std::shared_ptr> nuLineSplitter( std::string_view currEvt ) + { + auto lineBreaks = nuFindEach( currEvt, "\n" ); + std::vector trueBreaks; + trueBreaks.reserve( lineBreaks->size() ); + for( int k = 0 ; k < lineBreaks->size() - 1 ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + trueBreaks.push_back( (*lineBreaks)[k] ); + } + auto splitLines = std::make_shared>(); + splitLines->reserve( trueBreaks.size() ); + size_t startPos = 0; + for( auto k : trueBreaks ) + { + splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); + startPos = k; + } + if( auto strung = currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } + return splitLines; + } + + // ZW: fcn for finding each linebreak in a string, + // returning a vector of the positions of "\n" characters + // Ignores sequential linebreaks, ie would only return { } + // for the string "\n\n\n\n" + std::shared_ptr> lineFinder( std::string_view currEvt, size_t startPos = 0, size_t endPos = npos ) + { + auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); + auto truBreaks = std::make_shared>(); + truBreaks->reserve( lineBreaks->size() ); + for( int k = 0 ; k < lineBreaks->size() ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + truBreaks->push_back( (*lineBreaks)[k] ); + } + return truBreaks; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element separated by blankspace (" ") in the original string + // Ignores sequential blankspaces, as well as linebreaks + // ie "hello \n\n\n world" would return {"hello", "world"} + // Does not ignore linebreaks that are not separated from words + // by anything other than blankspace, + // ie "hello \n\n\nworld \n\n" would return {"hello", "\n\nworld"} + std::shared_ptr> nuWordSplitter( std::string_view currEvt ) + { + std::vector noSpace; + size_t nuStart = currEvt.find_first_not_of( " " ); + size_t nuEnd = currEvt.find(" ", nuStart+1 ); + auto splitWords = std::make_shared>(); + splitWords->reserve(13); + while( nuStart != npos ) + { + std::string_view word = currEvt.substr( nuStart, nuEnd - nuStart ); + if( word == "" || word == "\n" || word == " " ){ + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + continue; } + splitWords->push_back( currEvt.substr( nuStart, nuEnd - nuStart ) ); + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + } + return splitWords; + } + + // ZW: fcn for splitting a string into a vector of strings, + // elements separated by any form of blankspace in the original string + // Ignores sequential blankspaces of all forms + std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) + { + auto lines = nuLineSplitter( currEvt ); + auto splitString = std::make_shared>(); + splitString->reserve( lines->size() * lines->at(0).size() ); + for( auto line : *lines ) + { + auto words = nuWordSplitter(line); + for( auto word : *words ) + { + if( word == "" || word == "\n" || word == " " ){continue;} + splitString->push_back( word ); + } + } + return splitString; + } + + // ZW: templated fcn for comparing two + // string-like objects, ignoring cases + template + bool clStringComp( const Str1& org, const Str2& comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + template + bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ + return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + + // ZW: templated fcn for finding a caseless substring searchTerm in srcFile + // On failure to find searchTerm, returns REX::npos + template + size_t clStringFind( const Str1& srcFile, const Str2& searchTerm, size_t strtPt = 0 ){ + size_t strLen = searchTerm.size(); + if( srcFile.size() == 0 || srcFile.size() < strLen ){ return npos; } + for( size_t k = strtPt ; k < srcFile.size() - strLen; ++k ) + { + if( clStringComp( srcFile.substr(k, strLen), searchTerm ) ){ return k; } + } + return npos; + } + + // ZW: templated fcn for finding a caseless substring searchTerm of srcFile + // fulfilling a particular predicate cond( size_t, string ) + template + size_t clStringFindIf( const Str1& srcFile, const Str2& searchTerm, std::function& cond, size_t strtPt = 0 ) + { + auto currPt = clStringFind( srcFile, searchTerm, strtPt ); + bool condStat = cond( currPt, srcFile ); + while( !( condStat ) && currPt != npos) + { + currPt = clStringFind( srcFile, searchTerm, currPt + 1 ); + condStat = cond( currPt, srcFile ); + } + return currPt; + } + + // ZW: templated fcn for counting the number of occurances of + // caseless substring searchTerm in string-like object srcFile + template + int clStrCount( Str1 srcFile, Str2 searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = clStringFind( srcFile, searchTerm, pos ) ) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: templated fcn for finding each instance of + // of substring searchTerm of string-like object srcFile + template + std::shared_ptr> clFindEach( Str1 srcFile, Str2 searchTerm ) + { + auto eachPos = std::make_shared>(); + auto nos = clStrCount(srcFile, searchTerm); + if( nos == 0 ){ return eachPos; } + eachPos->reserve( nos ); + eachPos->push_back( clStringFind( srcFile, searchTerm ) ); + size_t currPos = clStringFind( srcFile, searchTerm, eachPos->at(0) + 1); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = clStringFind( srcFile, searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for finding left angle bracket + // indicating the start of a new node in an XML file + std::shared_ptr nodeStartFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = std::make_shared(parseFile.find("<", strtPos)); + while( parseFile[*retPtr + 1] == '!' || parseFile[*retPtr +1] == '/' || parseFile[*retPtr +1] == '?' ){ + *retPtr = parseFile.find("<", *retPtr +1); + } + return retPtr; + } + + // ZW: fcn for finding left angle bracket + // indicating an end of a node in an XML file + std::shared_ptr nodeEndFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = std::make_shared(parseFile.find("<", strtPos)); + while( parseFile[*retPtr + 1] != '/' ){ + *retPtr = parseFile.find("<", *retPtr +1); + } + return retPtr; + } + + // ZW: struct for handling tags in XML node opening tags + struct xmlTag { + public: + void setVal( std::string_view valSet ){ modded = true; val = valSet; } + void setId( std::string_view idSet ){ modded = true; id = idSet; } + std::string_view getVal(){ return val; } + std::string_view getId(){ return id; } + bool isModded(){ return modded; } + xmlTag(){ modded = false; return; } + xmlTag( xmlTag& oldTag ){ + modded = false; val = oldTag.getVal(); id = oldTag.getId(); + } + xmlTag( std::string_view initId, std::string_view initVal){ + modded = false; val = initVal; id = initId; + } + protected: + bool modded; + std::string_view val; + std::string_view id; + }; + + // ZW: function for parsing XML opening + // tags and returning the next header tag + std::shared_ptr xmlTagParser( std::string_view tagLine, size_t& equPt ) + { + auto tagBreaker = tagLine.find_first_not_of(" ", equPt+1); // ZW: need to determine what type of quotation marks are used + auto tagEnder = tagLine.find( tagLine[tagBreaker], tagBreaker+1); + auto attrEnd = tagLine.find_last_not_of(" ", equPt - 1) ; + auto attrStart = tagLine.find_last_of(" ", attrEnd) + 1; + auto tagPtr = std::make_shared(tagLine.substr(attrStart, attrEnd - attrStart + 1), tagLine.substr(tagBreaker + 1, tagEnder - tagBreaker - 1)); + equPt = tagLine.find("=", equPt + 1); // ZW: modifies input equPt to point to the next equality sign in tagLine + return tagPtr; + } + + // ZW: struct for handling nodes in generic XML files + struct xmlNode { + public: + xmlNode(){ modded = false; return; } + xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ + modded = false; xmlFile = originFile; start = begin; children = childs; + if( xmlFile.substr(start, 1) != "<" ){ start = *nodeStartFind( xmlFile, size_t(start) ); } + size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ ", trueStart) - trueStart ); + if( xmlFile.find( ">", trueStart ) < xmlFile.find( "/", trueStart ) ){ + content = xmlFile.substr( xmlFile.find( ">", trueStart ) + 1, xmlFile.find( "", trueStart ) - 1 ); + } + } + std::vector> getChildren(){ return children; } + std::vector> getTags(){ return tags; } + std::string_view getFile(){ return xmlFile; } + std::string_view getName(){ return name; } + std::string_view getContent(){ return content; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + virtual bool isModded(){ return modded; } + virtual bool isModded( bool deep ){ + bool modStat = isModded(); + if( !deep ){ return modStat; } + for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } + return modStat; + } + bool isWritten(){ return written; } + bool isParsed(){ return parsed; } + void setModded( bool mod ){ modded = mod; } + bool deepModded(){ return deepMod; } + bool deepParse(){ return deepParsed; } + void parser( bool recursive ){ + parsed = parse( recursive ); + } + void addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } + void addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } + void setFile( std::string_view file ){ modded = true; xmlFile = file; } + void setName( std::string_view newName ){ modded = true; name = newName; } + void setCont( std::string_view cont ){ modded = true; content = cont; } + protected: + virtual bool parse(){ + auto topStat = parseTop(); + auto contStat = parseContent(); + return ( topStat && contStat ); + } + virtual bool parse( bool recurs ) + { + bool parseSt = parse(); + if( !recurs ){ return parseSt; } + bool childSt = parseChildren( recurs ); + deepMod = true; + return (parseSt && childSt ); + } + bool parseTop(){ + if( xmlFile == "" ){ return false; } + size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); + while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } + return true; + } + virtual bool parseContent(){ + if( xmlFile == "" ){ return false; } + auto firstR = xmlFile.find_first_of( ">/", start ); + auto nodeStrEnd = xmlFile.find(">", firstR); + if( firstR < nodeStrEnd ){ content = ""; end = nodeStrEnd + 2; parsed = true; return true; } + auto endNode = *nodeEndFind( xmlFile, start ); + auto startNode = *nodeStartFind( xmlFile, start + 1 ); + if( startNode > endNode ){end = xmlFile.find( ">", endNode ) + 1; content = xmlFile.substr( xmlFile.find( ">", start ) + 1, endNode - xmlFile.find( ">", start ) - 1 ); return true; } + auto endPt = xmlFile.find( std::string("", start) + 1, startNode - xmlFile.find(">") - 1 ); + end = xmlFile.find( ">", endPt ) + 2; + while( startNode < endNode ){ + auto nextNode = std::make_shared( xmlFile, startNode ); + children.push_back( nextNode ); + int starts = 0; + while( startNode < endNode ) + { + startNode = *nodeStartFind( xmlFile, startNode + 1 ); + ++starts; + } + for( int k = 0 ; k < starts ; ++k ){ endNode = *nodeEndFind( xmlFile, endNode + 1 ); } + if( endNode > end ){ break; } + } + return true; + } + bool parseChildren( bool recursive ){ + bool status = true; + if( recursive ){ + for( auto child : children ) + { + status = (status && child->parse( true )); + deepParsed = true; + } + } else { + for( auto child : children ) + { + status = (status && child->parse()); + deepParsed = true; + } + } + return status; + } + std::shared_ptr writtenSelf; + bool deepMod = false; + std::vector> children; + std::vector> tags; + std::string_view xmlFile; + std::string_view name; + std::string_view content; + size_t start; + size_t end = npos; + bool modded = false; + bool written = false; + bool parsed = false; + bool deepParsed = false; + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + virtual void headWriter() { + nodeHeader = "<" + std::string(name) ; + for( auto tag : tags ){ + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + virtual void endWriter() { + nodeEnd = "\n"; + } + virtual void contWriter() { + if( children.size() > 0 ){ + nodeContent = std::string(content.substr(0, children[0]->start - 1 )); + } else { + nodeContent = std::string(content); + } + } + virtual void childWriter() { + for(auto child : children){ + nodeContent += (*child->nodeWriter()); + } + } + virtual void endFinder(){ + auto headEnd = xmlFile.find(">", start); + auto slashPos = xmlFile.find("/", start); + if( headEnd > slashPos ){ end = headEnd; } + else{ end = xmlFile.find( ">", xmlFile.find( "( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + endFinder(); + if( start > xmlFile.size() ){ start = 0; } + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + public: + virtual void childCounter( int& noChilds ) + { + for( auto child : children ) + { + child->childCounter( noChilds ); + if( child->end == 0 ){ --noChilds; } + } + noChilds += children.size(); + } + virtual std::shared_ptr nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + }; + + // ZW: function for large scale parsing of XML files + // sequentially goes through the document and + // recursively calls itself while the next node + // beginning is closer than the next node ending + std::shared_ptr xmlPtrParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for handling rwgt parameter sets + // in the LHE header initrwgt node + struct headWeight : xmlNode { + public: + int getId(){ return id; } + std::string_view getTag(){ return idTag; } + bool hasTag(){ return (idTag.size() > 0); } + headWeight(){ name = "weight"; return; } + headWeight( std::string_view paramSet, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight( xmlNode& node ) : xmlNode( node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( xmlNode* node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::shared_ptr node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight( std::string_view paramSet, std::string& idText){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; + } + void setId( std::string identity ){ modded = true; idTag = identity; } + protected: + std::string idTag; + long unsigned int id = npos; + void headWriter() override{ + if( tags.size() == 0 ){ + if( idTag == "" ){ nodeHeader = ""; return; } + if( id == npos ){ nodeHeader = ""; return; } + nodeHeader = ""; + } + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void headWriter( bool incId ){ + if( !incId ){ headWriter(); return; } + if( idTag == "" ){ headWriter(); return; } + if( id == npos ){ nodeHeader = "getId() == "id" ){ continue; } + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void endWriter() override{ + nodeEnd = "\n"; + } + void contWriter() override{ + nodeContent = std::string( content ); + } + void childWriter() override{ + for( auto child : children){ + if( child->getName() == "weight" ){ continue; } + nodeContent += *(child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + } + void fullWriter() override{ + if( isModded() || !isWritten() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } + } + void fullWriter( bool incId, bool hasChildren=true ){ + if( isModded() || !isWritten() ){ + headWriter( incId ); + contWriter(); + childWriter( ); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } + } + }; + + // ZW: struct for handling rwgt groups + // in the LHE header initrwgt node + struct weightGroup : xmlNode { + public: + bool getIncId(){ return includeId; } + void setIncId( bool nuIncId ){ includeId = nuIncId; } + std::vector> getWgts(){ return paramSets; } + void addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } + void addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} + weightGroup() : xmlNode(){ name = "weightgroup"; return; } + weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup( std::vector nuWgts ) : xmlNode(){ + name = "weightgroup"; + for( auto wgt : nuWgts ){ + paramSets.push_back( std::make_shared( wgt ) ); + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode( originFile, begin, childs ){ + name = "weightgroup"; + if( parseTop() ){ + int checker = 0; + for( auto tag : tags ){ + if( tag->getId() == "name" ){ ++checker; rwgtName = tag->getVal(); } + if( tag->getId() == "weight_name_strategy" ){ ++checker; wgtNamStrat = tag->getVal(); + if(wgtNamStrat == "includeIdInWeightName"){ includeId = true; } } + if( checker == 2 ){ break; } + } + } + } + protected: + std::string_view rwgtName; + std::string_view wgtNamStrat; + bool includeId = false; + std::vector> paramSets; + bool nu; + std::string_view idTag; + int id; + void headWriter() override{ + nodeHeader = "nodeWriter()); + } + } + void childWriter() override{ + for(auto child : children){ + nodeContent += (*child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + void endWriter() override{ nodeEnd = "
\n"; } + }; + + struct initRwgt : xmlNode { + public: + std::vector> getGroups(){ return groups; } + size_t noGrps(){ return groups.size(); } + void addGroup( weightGroup nuGroup ){ + modded = true; + auto nuGrpPtr = std::make_shared( nuGroup ); + if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } + } + void addGroup( std::shared_ptr nuGroup ){ + modded = true; + if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } + } + void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + void addWgt( unsigned int index, headWeight nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + initRwgt() : xmlNode(){ name = "initrwgt"; return; } + initRwgt( std::vector> nuGroups ) : xmlNode(){ + name = "initrwgt"; + for( auto group : nuGroups ){ + groups.push_back( std::make_shared( *group ) ); + } + } + initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + protected: + bool grpIsInit = false; + bool grpInit( std::shared_ptr& wgt ){ + if( grpIsInit ){ return true; } + else{ + groups = std::vector>( 1, wgt ); + grpIsInit = true; + return false; + } + } + std::vector> groups; + void contWriter() override{ + nodeContent = "\n"; + for( auto group : groups ){ + nodeContent += (*group->nodeWriter()); + } + } + void childWriter() override{ + for( auto child : children ){ + if( child->getName() == "weightgroup" ){ continue; } + nodeContent += (*child->nodeWriter()); + } + } + void childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + }; + + // ZW: struct for handling event + // in event blocks of LHE files + struct bodyWgt : xmlNode { + public: + void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } + void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} + void setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} + void setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} + void setId( std::string nuId ){ + modded = true; id = nuId; + for( auto tag : tags ){ + if( tag->getId() == "id" ){ tag->setVal( id ); return; } + } + addTag( std::make_shared( "id", id ) ); + } + void setModded( bool nuModded ){ modded = nuModded; } + std::string_view getComment(){ return comment; } + std::string_view getValS(){ return valS; } + double getValD(){ return valD; } + bodyWgt() : xmlNode(){ return; } + bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode( originFile, begin, childs ){ + auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); + valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); + valD = std::stod( valS ); + } + bodyWgt( double value, std::string& idTag ){ + setVal( value ); + id = idTag; + addTag( std::make_shared("id",id) ); + } + void appendWgt( std::shared_ptr document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + void appendWgt( std::string* document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + std::shared_ptr appendWgt( std::string_view document ){ + if(!isWritten() ){ fullWriter(); } + auto retDoc = std::make_shared( document ); + *retDoc += *writtenSelf; + return retDoc; + } + protected: + std::string_view comment; + std::string valS; + std::string id; + double valD; + void fullWriter() override { + writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + *writtenSelf += ">" + std::string(valS) + "\n"; + modded = false; + written = true; + } + }; + + // ZW: fcn for finding the next block in SLHA format + // parameter cards + size_t blockFinder( std::string_view parseFile, size_t startPt = 0 ){ + if( parseFile.size() > 5 ){ if( clStringComp(parseFile.substr(0,5), std::string("block") )){ return size_t(0); } } + return clStringFind( parseFile, std::string("\nblock"), startPt ); + } + + // ZW: fcn for finding each decay line in SLHA format + // parameter card + std::vector decBlockStractor( std::string_view parseFile ){ + auto allDs = nuFindEach( parseFile, "\nd" ); + std::vector decLines; + decLines.reserve( allDs->size() ); + for( auto pos : *allDs ) + { + if( !(clStringComp(parseFile.substr( pos+1, 5 ), std::string("decay"))) ){ continue; } + decLines.push_back( parseFile.substr( pos + 1, parseFile.find( "\n", pos + 1 ) - pos - 1 ) ); + } + return decLines; + } + + // ZW: fcn for extracting the relevant lines of + // a block in SLHA format parameter card + // removes any comments between start of this block and next + // and also ignores lines with other information, + // eg DECAY lines + std::vector blockLineStractor( std::string_view parseFile, size_t startPt = 0){ + auto blockStrt = blockFinder( parseFile, startPt ); + auto newBlock = blockFinder( parseFile, blockStrt + 1 ); + std::vector paramLines; + paramLines.reserve( nuStrCount( parseFile, "\n" ) ); + std::shared_ptr> parLines; + if( newBlock == npos ){ parLines = nuLineSplitter( parseFile.substr( blockStrt ) ); } + else{ parLines = nuLineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } + for( auto line : *parLines ) + { + if( line.size() == 0 ){ continue; } + if( line[0] != ' ' ){ continue; } + paramLines.push_back( line ); + } + return paramLines; + } + + // ZW: struct for handling the first line of + // LHE format event block + struct evHead { + public: + std::string_view getComment(){ return comment; } + std::string_view getWeight(){ return weight; } + std::string_view getScale(){ return scale; } + std::string_view getAQED(){ return aqed; } + std::string_view getAQCD(){ return aqcd; } + std::string_view getNprt(){ return nprt; } + std::string_view getProcID(){ return procid; } + bool isModded(){ return modded; } + bool isWritten(){ return written; } + void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } + void setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } + void setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } + void setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } + void setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } + void setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } + std::shared_ptr getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + evHead(){ return; } + evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) + { + if( originFile.size() == 0){ return; } + beginLine = originFile.find_first_not_of("\n ", beginLine); + if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + nprt = evLine->at(0) ; + procid = evLine->at(1); + weight = evLine->at(2); + scale = evLine->at(3); + aqed = evLine->at(4); + aqcd = evLine->at(5); + } + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view weight; + std::string_view scale; + std::string_view aqed; + std::string_view aqcd; + std::string_view nprt; + std::string_view procid; + bool modded = false; + bool written = false; + void writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + auto retText = std::make_shared( " " ); + *content = " " + std::string( nprt ); + for( int k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } + *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + }; + + // ZW: struct for handling particle lines + // in LHE format event block + struct lhePrt{ + public: + std::string_view getLine(){ return sourceFile; } + std::string_view getComment(){ return comment; } + std::vector getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } + std::string_view getE(){ return energy; } + std::string_view getMass(){ return mass; } + std::string_view getVTim(){ return vtim; } + std::string_view getSpin(){ return spin; } + std::string_view getPDG(){ return pdg; } + std::string_view getStatus(){ return status; } + std::vector getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } + std::vector getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } + void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } + void setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } + void setMass( std::string_view nuM ){ modded = true; mass = nuM; } + void setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } + void setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } + void setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } + void setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } + void setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } + void setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } + bool isModded(){ return modded; } + bool isWritten(){ return written; } + std::shared_ptr getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + lhePrt(){ return; } + lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) + { + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + pdg = evLine->at(0); + status = evLine->at(1); + mothers[0] = evLine->at(2); mothers[1] = evLine->at(3); + icol[0] = evLine->at(4); icol[1] = evLine->at(5); + for( int k = 6 ; k < 9 ; ++k){ + mom[k-6] = evLine->at(k); + } + energy = evLine->at(9); + mass = evLine->at(10); + vtim = evLine->at(11); + spin = evLine->at(12); + if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } + } + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view mom[3]; + std::string_view energy; + std::string_view mass; + std::string_view vtim; + std::string_view spin; + std::string_view pdg; + std::string_view status; + std::string_view mothers[2]; + std::string_view icol[2]; + bool modded = false; + bool written = false; + void writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + *content = ""; + for( int k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } + *content += std::string(pdg) + " " + std::string(status); + for( auto mum : mothers ){ *content += " " + std::string( mum ); } + for( auto col : icol ){ *content += " " + std::string( col ); } + for( auto pval : mom ){ *content += " " + std::string(pval); } + *content += " " + std::string( energy ) + " " + std::string( mass ) + " " + std::string( vtim ) + " " + std::string( spin ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + }; + + // ZW: struct for handling LHE format event block + struct event : xmlNode { + public: + evHead getHead(){ return header; } + std::vector> getPrts(){ return prts; } + std::vector> getWgts(){ return rwgt; } + void setHead( evHead head ){ modded = true; header = head; } + void addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } + void addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } + void setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } + void addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } + void addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } + void addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } + void addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } + bool newWeight(){ return addedWgt; } + int getNprt(){ return prts.size(); } + bool isModded() override{ return modded; } + bool isModded( bool deep ) override { + if( !deep ){ return modded; } + bool modStat = modded; + for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } + modStat = (modStat || header.isModded()); + for( auto prt : prts ){ if(modStat){ return modStat; }; modStat = (modStat || prt->isModded()); } + for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } + return modStat; + } + event(){ return; } + event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs) { + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart == npos ){ return; } + auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); + } + } + event( const xmlNode& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + bool prtsAreMod(){ + for( auto prt : prts ){ if( prt->isModded() ){ return true; } } + return false; + } + bool headIsMod(){ + return header.isModded(); + } + protected: + std::vector> rwgt; + std::shared_ptr childRwgt; + bool hasRwgt(){ + if( rwgt.size() > 0 ){ return true; } + return false; + } + bool rwgtChild(){ + if( childRwgt != nullptr ){ return true; } + for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } + return false; + } + bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } + bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } + evHead header; + std::vector> prts; + bool inRwgtChild( std::string_view name ){ + for( auto child : childRwgt->getChildren() ){ + for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } + } + return false; + } + bool checkRwgtOverlap(){ + for( auto wgt : rwgt ){ + for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } + } + return false; + } + void childRwgtWriter(){ + if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } + } + void vecRwgtWriter( bool midNode = false ){ + if( !midNode ){ nodeContent += "\n"; } + for( auto wgt : rwgt ){ + nodeContent += *wgt->nodeWriter(); + } + nodeContent += "\n"; + } + void rwgtWriter(){ + if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } + childRwgtWriter(); + nodeContent.erase( nodeContent.size() - 8, 8 ); + vecRwgtWriter(); + return; + } else { + if( hasRwgt() ){ vecRwgtWriter(); return; } + if( rwgtChild() ){ childRwgtWriter(); return; } + } + } + void contWriter() override { + nodeContent = "\n" + *header.getContent(); + for( auto prt : prts ){ + nodeContent += *prt->getContent(); + } + } + void childWriter() override { + for( auto child : children ){ + if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } + nodeContent += *child->nodeWriter(); + } + } + bool addedWgt = false; + void fullWriter() override { + if( isModded( false ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void fullWriter( bool deep ){ + if( !deep ){ fullWriter(); return; } + if( isModded( true ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void appendWgts(){ + if( !addedWgt ){ return; } + writtenSelf->erase( writtenSelf->size() - 17, 17 ); + for( auto wgt : rwgt ){ + if( !wgt->isWritten() ){ wgt->appendWgt( writtenSelf ); } + } + *writtenSelf += "\n
\n"; + } + public: + std::shared_ptr nodeWriter() override { + if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + std::shared_ptr nodeWriter( bool recursive ){ + if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + }; + + // ZW: struct for handling the first line of + // LHE format init tag + struct lheInitHead{ + public: + std::string_view idbmup[2]; + std::string_view ebmup[2]; + std::string_view pdfgup[2]; + std::string_view pdfsup[2]; + std::string_view idwtup; + std::string_view nprup; + bool isWritten(){ return written; } + bool isModded(){ return modded; } + std::shared_ptr getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitHead( std::string_view initHead ){ + auto vals = *nuBlankSplitter( initHead ); + if( vals.size() < 10 ){ return; } + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + lheInitHead( xmlNode& initNode ) + { + if( initNode.getName() != "init" ){ return; } + auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; + auto endPos = initNode.getFile().find( "\n", startPos ); + auto vals = *nuBlankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(){ + *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) + + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; + written = true; + modded = false; + } + }; + + // ZW: struct for handling process lines + // in LHE format init tag + struct lheInitLine { + public: + std::string_view xsecup; + std::string_view xerrup; + std::string_view xmaxup; + std::string_view lprup; + bool isWritten(){ return written; } + bool isModded(){ return modded; } + std::shared_ptr getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitLine(){} + lheInitLine( std::string_view procLine ) + { + auto vals = *nuBlankSplitter( procLine ); + if( vals.size() < 4 ){ return; } + xsecup = vals[0]; + xerrup = vals[1]; + xmaxup = vals[2]; + lprup = vals[3]; + } + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(){ + *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; + written = true; + modded = false; + } + }; + + // ZW: struct for handling single parameter line in + // SLHA format parameter card + struct paramVal{ + public: + double value = 0; + int id = 0; + std::string_view realLine; + std::string_view comment; + std::string_view idStr; + std::string_view valStr; + virtual void parse(){ + id = std::stoi( std::string(idStr) ); + value = std::stod( std::string(valStr) ); + } + paramVal(){ realLine = ""; idStr = ""; valStr = ""; } + paramVal( std::string_view paramLine, bool parseOnline = false ) + { + if( paramLine.find("\n") != npos ){ + auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); + if( startPos!= npos ){ + auto endPos = paramLine.find("\n", startPos); + realLine = paramLine.substr(startPos, endPos - startPos - 1); + } else{ + realLine = paramLine.substr( 0, paramLine.find("\n") - 1 ); + } + } + realLine = paramLine; + auto vals = *nuBlankSplitter( realLine ); + idStr = vals[0]; + valStr = vals[1]; + if( parseOnline ){ + if( vals.size() > 2 ) + { + auto comStart = realLine.find("#"); + comStart = realLine.find_first_not_of( " #", comStart ); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + parse(); } + } + bool isMod(){ return modded; } + bool modded = false; + virtual std::shared_ptr selfWrite(){ + auto writeVal = std::make_shared(""); + if( isMod() ) + { + for( int k = idStr.size() ; k < 5 ; ++k ){ *writeVal += " "; } + *writeVal += std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + }; + + // ZW: struct for handling single DECAY line + // in SLHA format parameter card + struct decVal : paramVal{ + public: + void parse() override { + auto vals = *nuBlankSplitter( realLine ); + id = std::stoi( std::string(vals[1]) ); + value = std::stod( std::string(vals[2]) ); + if( vals.size() > 3 ) + { + auto comStart = realLine.find("#"); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + } + decVal( std::string_view paramLine = "", bool parseOnline = false ) : paramVal( paramLine, false ) + { + if( parseOnline ){ parse(); } + } + std::shared_ptr selfWrite() override { + auto writeVal = std::make_shared(""); + if( isMod() ) + { + *writeVal += "DECAY " + std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + }; + + // ZW: struct for handling parameter block + // in SLHA format parameter card + struct paramBlock { + public: + std::string_view realBlock; + size_t startPt; + std::string_view comment; + std::string_view initComm; + std::string_view name; + std::vector params; + virtual void parse( bool parseOnline = false ){ + if( realBlock.size() == 0 ){ return; } + if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } + auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); + name = realBlock.substr( namePt, realBlock.find_first_of( " \n", namePt ) - namePt ); + if( realBlock.find( " ", namePt ) < realBlock.find( "\n", namePt ) ) + {comment = realBlock.substr( namePt + name.size(), realBlock.find( "\n", namePt ) - namePt - name.size() ); } + auto paramLines = blockLineStractor( realBlock.substr( startPt ) ); + params.reserve( paramLines.size() ); + for( auto line : paramLines ) + { + params.push_back( paramVal( line, parseOnline ) ); + } + } + paramBlock(){ return; } + paramBlock( std::string_view paramSet, bool parseOnline = false ) + { + realBlock = paramSet; + startPt = clStringFind( realBlock, std::string("\nB") ); + if( parseOnline ){ parse(parseOnline); } + } + bool isMod(){ return modded; } + bool modded = false; + virtual std::shared_ptr selfWrite(){ + auto writeBlock = std::make_shared(""); + if( isMod() ) + { + *writeBlock += "\nBLOCK " + std::string(name); + if( comment.size() > 0 ){ + *writeBlock += " # " + std::string( comment ); + } + *writeBlock += "\n"; + for ( auto val : params ) + { + *writeBlock += *val.selfWrite(); + } + } + else{ if( startPt == npos ){ + *writeBlock += realBlock; + } else { + *writeBlock = realBlock.substr( startPt ); + } } + return writeBlock; + } + }; + + // ZW: struct for handling DECAY lines + // in SLHA format parameter card + struct decBlock : paramBlock { + public: + std::vector decays; + void parse( bool parseOnline = false ) override{ + if( realBlock.size() == 0 ){ return; } + auto decLines = clFindEach( realBlock, std::string("\ndecay") ); + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + void parse( std::shared_ptr> decLines, bool parseOnline = false ) { + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + decBlock( std::string_view paramSet = "", bool parseOnline = false ) : paramBlock( paramSet, parseOnline ) + { + realBlock = paramSet; + if( parseOnline ){ parse(parseOnline); } + } + std::shared_ptr selfWrite() override { + auto writeBlock = std::make_shared(""); + *writeBlock += "\n"; + for ( auto val : decays ) + { + *writeBlock += *val.selfWrite(); + } + return writeBlock; + } + }; + + // ZW: struct for handling SLHA parameter cards + struct lesHouchesCard { + public: + decBlock decays; + std::string_view xmlFile; + size_t start; + size_t end; + bool modded; + bool parsed; + std::string_view header; + std::vector blocks; + size_t blockStart; + std::function lambda = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; + std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; + std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) + { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; + void parse( bool parseOnline = false ) + { + if( parsed ){ return; } + if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } + auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); + auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); + header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); + for( int k = 0 ; k < blockPts->size() - 1 ; ++k ) + { + blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); + } + blocks.push_back(paramBlock(xmlFile.substr(blockPts->at(blockPts->size()-1), clStringFindIf( xmlFile, std::string("\n"), + lambda, blockPts->at(blockPts->size()-1) + 1) - blockPts->at(blockPts->size()-1)), parseOnline)); + decays = decBlock( xmlFile ); + decays.parse( decLines, parseOnline ); + parsed = true; + } + lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ + xmlFile = originFile; start = begin; size_t trueStart = originFile.find_first_not_of("\n ", begin+1); + modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" selfWrite(){ + auto writeCard = std::make_shared(header); + if( isMod() ) + { for( auto block : blocks ) + { *writeCard += *block.selfWrite(); } + *writeCard += *decays.selfWrite(); } + else{ + if( end != npos ){ *writeCard += std::string( xmlFile.substr( blockStart, end - blockStart ) ); + } else{ *writeCard += std::string( xmlFile.substr( blockStart ) ); } + } + return writeCard; + } + }; + + struct slhaNode : xmlNode { + public: + std::shared_ptr getParameters(){ + modded = true; + return parameterCard; + } + slhaNode() : xmlNode(){} + slhaNode( lesHouchesCard parameters ) : xmlNode(){ + parameterCard = std::make_shared( parameters ); + pCardInit = true; + } + slhaNode( std::shared_ptr parameters ) : xmlNode(){ + parameterCard = parameters; + pCardInit = true; + } + slhaNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); + } + slhaNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + : xmlNode( originFile, begin ){ + if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } + } + protected: + std::shared_ptr parameterCard; + bool pCardInit = false; + void headWriter() override{ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void endWriter() override{ nodeEnd += "\n"; } + void contWriter() override{ + if( pCardInit ){ + nodeContent = *parameterCard->selfWrite(); + } else { + nodeContent = content; + } + } + }; + + // ZW: struct for handling LHE init nodes + struct initNode : xmlNode { + public: + std::shared_ptr getHead(){ return initHead; } + std::vector> getLines(){ return initLines; } + void setHead( std::shared_ptr head ){ modded = true; initHead = head; } + void setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } + void addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } + initNode() : xmlNode(){ name = "init"; } + initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + : xmlNode( originFile, begin ){ + auto strtPt = originFile.find_first_not_of(" \n", originFile.find(">", start+1)); + content = originFile.substr( strtPt, originFile.find(" initHead; + std::vector> initLines; + bool parseContent() override{ + if( content.size() == 0 ){ return false; } + auto linebreaks = lineFinder( content ); + if( linebreaks->size() == 0 ){ return false; } + initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); + for( int k = 0 ; k < linebreaks->size() - 1 ; ++k ){ + initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); + } + return true; + } + void contWriter() override{ + if( isModded() ){nodeContent = std::string( content ); return; } + nodeContent = *initHead->getContent(); + for( auto line : initLines ){ + nodeContent += *line->getContent(); + } + } + }; + + // ZW: struct for explicitly handling LHE header nodes + struct lheHead : xmlNode { + public: + size_t addWgtGroup( std::shared_ptr& wgtGroup ){ + hasRwgt = true; + modded = true; + if( wgtGrpInit( wgtGroup ) ){ + rwgtNodes->addGroup( wgtGroup ); + } + return (rwgtNodes->noGrps() - 1); + } + size_t addWgtGroup( weightGroup wgtGroup ){ + hasRwgt = true; + modded = true; + auto wgtGrpPtr = std::make_shared( wgtGroup ); + if( wgtGrpInit( wgtGrpPtr ) ){ + rwgtNodes->addGroup( std::make_shared( wgtGroup ) ); + } + return (rwgtNodes->noGrps() - 1); + } + void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, headWeight nuWgt ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, std::shared_ptr nuWgt, std::string idTagg ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt->setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void addWgt( unsigned int index, headWeight nuWgt, std::string idTagg ){ + if( index >= (int)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt.setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } + void setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } + std::vector> getWgtGroups(){ return rwgtNodes->getGroups(); } + std::shared_ptr getInitRwgt(){ return rwgtNodes; } + std::shared_ptr getParameters(){ return parameters; } + void setParameters( std::shared_ptr params ){ parameters = params; } + bool rwgtInc(){ return hasRwgt; } + lheHead(){ return; } + lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs){ + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + } + protected: + bool wgtGrpIsInit = false; + bool wgtGrpInit( std::shared_ptr& wgtGrp ){ + if( wgtGrpIsInit ){ return true; } + if( rwgtNodes == nullptr ){ + rwgtNodes = std::make_shared(); + wgtGrpIsInit = true; + rwgtNodes->addGroup( wgtGrp ); + return false; + } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); + } + std::shared_ptr parameters; + bool hasRwgt = false; + std::shared_ptr rwgtNodes; + std::vector> initrwgt; + bool relChildSet = false; + std::vector relChild; + void setRelChild(){ + if( relChildSet ){ return; } + relChild.reserve( children.size() ); + for( int k = 0 ; k < children.size() ; ++k ){ + auto child = &children[k]; + if( (*child)->getName() == "slha" ){ continue; } + if( (*child)->getName() == "initrwgt" ){ continue; } + relChild.push_back( k ); + } + relChildSet = true; + } + bool parseChildren( bool recursive ){ + bool status = true; + for( auto child : children ){ + if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } + child->parser( recursive ); + status = (status && child->isParsed() ); + deepParsed = true; + } + return status; + } + void headWriter() override{ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">\n"; + } + void childWriter() override{ + setRelChild(); + for( auto relKid : relChild ){ + nodeContent += *(children[relKid]->nodeWriter()); + } + if( parameters != nullptr ){ nodeContent += *parameters->nodeWriter(); } + if( hasRwgt ){ + nodeContent += *rwgtNodes->nodeWriter(); + } + } + void fullWriter() override{ + if( isModded() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + } + } + }; + + // ZW: struct for keeping track of appended weights in LHE node, + // since weight information is stored both in the header + // and in the individual events + struct newWgt{ + protected: + std::shared_ptr headWgt; + std::vector> bodyWgts; + public: + newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ + headWgt = heaWgt; bodyWgts = bodWgts; + } + newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ + headWgt = heaWgt; + bodyWgts = std::vector>(wgts->size()); + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + headWgt = std::make_shared(parameters, idTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } + newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); + headWgt = std::make_shared(parameters, newTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), newTag); + } + } + newWgt( std::string& parameters ){ + headWgt = std::make_shared(parameters); + } + newWgt( std::string& parameters, std::string& idTag ){ + headWgt = std::make_shared(parameters, idTag); + } + std::shared_ptr getHeadWgt(){ return headWgt; } + std::vector> getBodyWgts(){ return bodyWgts; } + void addBdyWgts( std::shared_ptr> wgts ){ + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + }; + + // ZW: general struct for handling LHE files explicitly + struct lheNode : xmlNode { + public: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + lheNode() : xmlNode(){} + lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + : xmlNode(originFile, begin, childs){ + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + } + bool isModded() override{ return modded; } + bool isModded( bool deep ) override{ + if( !deep ){ return isModded(); } + bool modStat = isModded(); + for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } + for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } + return modStat; + } + void addWgt( size_t index, newWgt& addedWgt ){ + header->addWgt( index, addedWgt.getHeadWgt() ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ + header->addWgt( index, addedWgt.getHeadWgt(), idTag ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + protected: + virtual void headerWriter(){ + nodeContent += "\n" + *header->nodeWriter(); + } + virtual void initWriter(){ + nodeContent += *init->nodeWriter(); + } + virtual void eventWriter(){ + for( auto event : events ){ + nodeContent += *event->nodeWriter(); + } + } + void contWriter() override{ + nodeContent = ""; + headerWriter(); + initWriter(); + eventWriter(); + } + void fullWriter() override{ + if( isModded( true ) ){ + headWriter(); + contWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr(start, end - start ) ); + written = true; + } + } + public: + virtual std::shared_ptr nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + }; + + // ZW: function for extracting event information from + // LHE files + std::vector>> valExtraction( const lheNode& lheFile ) + { + bool getGs = true; + auto momVec = std::make_shared>(); + auto wgtVec = std::make_shared>(); + auto gVec = std::make_shared>(); + momVec->reserve( lheFile.events.size() * 4 * std::stoi(std::string(lheFile.events[0]->getHead().getNprt())) ); + wgtVec->reserve( lheFile.events.size() ); + gVec->reserve( lheFile.events.size() ); + if( getGs ){ + for( auto event : lheFile.events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + } + } else{ + for( auto event : lheFile.events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + + } } + return {momVec, gVec, wgtVec}; + } + + // ZW: fcn for parsing an LHE format event block + // and return a REX format event object + std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + size_t equalSign = parseFile.find_first_of("=>", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format header + // and return a REX format lheHead object + std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + auto nuStrtPos = *nodeStartFind( parseFile, initPos); + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ + auto nuLine = parseFile.find("\n", parseFile.find("<", initPos)); + currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); + } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ + currNode->setInitRwgt( std::make_shared( currNode->getChildren()[ currNode->getChildren().size() - 1 ] ) ); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format file + // and return a REX format LHE node object + std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = *nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + auto nuStrtPos = *nodeStartFind( parseFile, initPos); + //if( nuStrtPos == parseFile.find("events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + // continue; + //} else if( nuStrtPos == parseFile.find("header = lheHeadParser( parseFile, initPos, endPos ); + // continue; + //} else if( nuStrtPos == parseFile.find("init = std::make_shared( parseFile, initPos ); + // initPos = *nodeStartFind( parseFile, endPos ); + // endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + // continue; + //} + if( parseFile.substr( initPos, 6 ) == "events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + continue; + } else if( parseFile.substr( initPos, 7 ) == "header = lheHeadParser( parseFile, initPos, endPos ); + continue; + } else if( parseFile.substr( initPos, 5 ) == "init = std::make_shared( parseFile, initPos ); + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + continue; + } else { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = *nodeStartFind( parseFile, endPos ); + endPos = *nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for treating individual HEP + // processes, formatted based on PDG codes + // and the LHE particle status standard + struct lheProc { + public: + std::vector minusOne; + std::vector plusOne; + std::vector minusTwo; + std::vector plusTwo; + std::vector plusThree; + std::vector minusNine; + std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; + lheProc( event& eventNode ) + { + for( auto prt : eventNode.getPrts() ) + { + valVecs[prt->getStatus()].push_back(prt->getPDG()); + } + } + }; + + // ZW: fcn for uploading text files + // to the program, pushing all characters to lowercase + std::shared_ptr filePuller( const std::string& fileLoc ) + { + std::ifstream fileLoad( fileLoc ); + std::stringstream buffer; + buffer << fileLoad.rdbuf(); + auto fileContent = std::make_shared(buffer.str()); + //std::transform( fileContent->begin(), fileContent->end(), fileContent->begin(), ::tolower ); + buffer.str(std::string()); + fileLoad.close(); + return fileContent; + } + + // ZW: fcn for saving std::string to disk + bool filePusher( std::string fileLoc, std::string fileCont ) + { + std::ofstream fileWrite( fileLoc ); + if(!fileWrite){return false;} + fileWrite << fileCont; + fileWrite.close(); + return true; + } + + // ZW: fcn for extracting the fill + // process information from an LHE event + std::shared_ptr>> pgdXtract( event& currEv, const std::vector& pdgVec ) + { + auto currProc = std::make_shared>>(); + auto &useProc = *currProc; + for( auto prt : currEv.getPrts() ) + { + useProc[ prt->getStatus() ].push_back(prt->getPDG()); + } + return currProc; + } + + // ZW: fcn for comparing two processes it the + // format output by pgdXtract + bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& pdgVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : pdgVec ) + { + if( firstVec[code] != secVec[code] ){ return false; } + } + return true; + } + + // ZW: fcn for processes in the lheProc struct format + bool procComp( const lheProc& firstProc, const lheProc& secProc, const std::vector& pdgVec ) + { + for( auto stat : pdgVec ) + { + if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } + if( firstProc.valVecs.at(stat) != secProc.valVecs.at(stat) ){ return false; } + } + return true; + } + + // ZW: fcn for checking whether a list of pdgKtract format + // processes sourceProcList contains a given process newProc + bool procVecContains( std::vector>>>& sourceProcList, + std::map>& newProc, const std::vector& pdgVec ) + { + int noProcs = sourceProcList.size(); + for( auto proc : sourceProcList ) + { + if( sameProcString( *proc, newProc, pdgVec ) ){ return true; } + } + return false; + } + + // ZW: fcn for checking whether a vector of lheProc structs + // procList contains a given lheProc nuProc + bool procListComp( const std::vector>& procList, const lheProc& nuProc, const std::vector& pdgVec ) + { + if( procList.size() != 0 ){ + for(auto proc : procList ) + { + if( procComp( *proc, nuProc, pdgVec ) ){ return true; } + } + } + return false; + } + + // ZW: fcn for extracting the different processes + // in a given REX format LHE file in the pdgXtract format + std::vector>>> procExtractor( const lheNode& lheFile ) + { + std::vector>>> procList; + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + for( auto event : lheFile.events ) + { + auto currProc = pgdXtract( *event, pdgVec ); + if( procVecContains( procList, *currProc, pdgVec ) ){ continue; } + procList.push_back(currProc); + } + return procList; + } + + // ZW: fcn for extracting the differenty processes + // in a given REX format LHE file in the lheProc format + std::vector> processPull( const lheNode& lheFile ) + { + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto event : lheFile.events ) + { + auto currProc = std::make_shared( *event ); + if( procListComp( procsList, *currProc, pdgVec ) ){ continue; } + procsList.push_back( currProc ); + } + return procsList; + } + + // ZW: fcn for keeping track of subprocess ordering + // in LHE file + int procPos( const std::vector>& evtSet, lheProc& currProc, + const std::vector& pdgVec ) + { + for( auto k = 0 ; k < evtSet.size() ; ++k ) + { + for( auto stat : pdgVec ) + { + if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } + } + return k; + } + return evtSet.size(); + } + + // ZW: fcn for extracting the subprocess ordering + // of LHE file + std::vector>> procOrder( const lheNode& lheFile, const std::vector>& evtSet ) + { + const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector>> eventBools( evtSet.size()); + std::vector> pracBools( evtSet.size(), std::vector ( lheFile.events.size() )); + for( auto boolSets : pracBools ){ + std::fill( boolSets.begin(), boolSets.end(), false ); + } + for( auto k = 0 ; k < lheFile.events.size() ; ++k ) + { + auto currProc = lheProc(*lheFile.events[k]); + pracBools[ procPos(evtSet, currProc, pdgVec) ][ k ] = true; + } + for( int k = 0 ; k < eventBools.size() ; ++k ) + { + eventBools[k] = std::make_shared>( pracBools[k] ); + } + return eventBools; + } + + // ZW: fcn for reordering LHE file based on subprocess + std::shared_ptr>> eventReOrder( const lheNode& lheFile, std::vector relProc ) + { + auto reOrdered = std::make_shared>>(); + reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); + for( int k = 0 ; k < relProc.size() ; ++k ) + { + if(!relProc[k]){continue;} + reOrdered->push_back( lheFile.events[k] ); + } + return reOrdered; + } + + // ZW: wrapper for eventReOrder + std::vector>>> lheReOrder( const lheNode& lheFile ) + { + auto procSets = processPull( lheFile ); + auto relProcs = procOrder( lheFile, procSets ); + std::vector>>> ordProcs(procSets.size()); + for( int k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + // ZW: transposed event information struct + struct evtInfo { + public: + std::vector wgts; + std::vector scales; + std::vector aQEDs; + std::vector aQCDs; + std::vector nprts; + std::vector procIDs; + evtInfo( const std::vector>& lheFile = {} ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + nprts.push_back(evt->getHead().getNprt()); + procIDs.push_back(evt->getHead().getProcID()); + } + } + }; + + // ZW: transposed particle information struct + struct prtInfo { + public: + std::vector moms; + std::vector masses; + std::vector vtims; + std::vector spins; + std::vector statuses; + std::vector mothers; + std::vector icols; + std::vector pdgs; + prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto prt : evt->getPrts() ) + { + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( int k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + }; + + // ZW: transposed LHE file with a single process type + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + evtsHead = evtInfo(lheFile); + evtsData = prtInfo(lheFile, nPrt); + } + }; + + // ZW: transposed LHE file ordered by subprocess + struct transLHE { + public: + std::string_view xmlFile; + std::vector> subProcs; + transLHE( lheNode& lheFile ) + { + xmlFile = lheFile.getFile(); + auto procsOrdered = lheReOrder( lheFile ); + subProcs = std::vector>( procsOrdered.size() ); + for( int k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); + } + } + }; + + // ZW: vector transformation string_to_double + std::shared_ptr> vecStoD( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stod(std::string(stv)); + } ); + return valVec; + } + + // ZW: vector transformation string_to_int + std::shared_ptr> vecStoI( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stoi(std::string(stv)); + } ); + return valVec; + } + + // ZW: templated fcn for multiplying two vectors elementwise, + // assuming T has a multiplication operator* + template + std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ + if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } + auto valVec = std::make_shared>( vec1.size() ); + std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ + return v1 * v2; + } ); + return valVec; + } + + // ZW: bool struct to define which double values + // to extract transposed from LHE file + struct lheRetDs{ + public: + bool ebmup = false; + bool xsecup = false; + bool xerrup = false; + bool xmaxup = false; + bool xwgtup = false; + bool scalup = false; + bool aqedup = false; + bool aqcdup = false; + bool pup = true; + bool mass = false; + bool vtimup = false; + bool spinup = false; + std::vector getBools(){ + return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, + pup, mass, vtimup, spinup }; + } + }; + + // ZW: bool struct to define which int values + // to extract transposed from LHE file + struct lheRetInts{ + public: + //bool maxpup = false; + bool idbmup = false; + bool pdfgup = false; + bool pdfsup = false; + bool idwtup = false; + bool nprup = false; + bool lprup = false; + //bool maxnup = false; + bool nup = true; + bool idprup = false; + bool idup = true; + bool istup = true; + bool mothup = false; + bool icolup = false; + std::vector getBools(){ + return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, + nup, idprup, idup, istup, mothup, icolup }; + } + }; + + // ZW: function for extracting transposed double values + // from LHE file + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.init->getHead()->ebmup[0], lheFile.init->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + // ZW: function for extracting transposed int values + // from LHE file + std::shared_ptr>>> lheValInts( lheNode& lheFile, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idbmup[0], lheFile.init->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfgup[0], lheFile.init->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfsup[0], lheFile.init->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.init->getLines().size() ); + for( auto line : lheFile.init->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } +} \ No newline at end of file diff --git a/tools/REX/pepper.cu b/tools/REX/pepper.cu new file mode 100644 index 0000000000..b49c20fb16 --- /dev/null +++ b/tools/REX/pepper.cu @@ -0,0 +1,169 @@ +#include "PEPPER.hpp" +#include "fbridge.cc" +#include +#include + +struct fbridgeRunner{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + CppObjectInFortran *fBridge; + const unsigned int chanId = 0; + const unsigned int nMom = 4; + unsigned int nEvt; + unsigned int nPar; + fbrideRunner(){} + fbridgeRunner( PEP::lheNode& lheFile ){ + if( !lheFile.isParsed() ){ lheFile.deepParse(); } + nEvt = lheFile.events.size(); + rndHel = std::vector( nEvt, 0. ); + rndCol = std::vector( nEvt, 0. ); + selHel = std::vector( nEvt, 0 ); + selCol = std::vector( nEvt, 0 ); + nPar = lheFile.events[0]->getPrts().size(); + } + fbridgeRunner( std::shared_ptr lheFile ){ + if(!lheFile->isParsed() ){ lheFile->deepParse(); } + nEvt = lheFile->events.size(); + rndHel = std::vector( nEvt, 0. ); + rndCol = std::vector( nEvt, 0. ); + selHel = std::vector( nEvt, 0 ); + selCol = std::vector( nEvt, 0 ); + nPar = lheFile->events[0]->getPrts().size(); + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + std::shared_ptr> nuMom( nEvt ); + std::shared_ptr> nuAlphaS( nEvt ); + std::transform( momenta->begin(), momenta->end(), nuMom->begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS->begin(), alphaS->end(), nuAlphaS->begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + std::shared_ptr> evalScatAmps( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + auto nuMom = std::vector( nEvt ); + auto nuAlphaS = std::vector( nEvt ); + std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } + auto evalScatAmps = std::shared_ptr>( nEvt ); + fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + return evalScatAmps; + } +}; + +std::shared_ptr> meEval( std::vector& x, std::vector& y){ + int random = rand() % 10; + if( random == 0 ){ random = 11; } + auto thisIsIt = std::make_shared>( y.size(), random ); + return thisIsIt; +} + +int usage( char* argv0, int ret = 1 ) +{ + std::cout << "Usage: " << argv0 + << " [--lhefile=\"/YOUR/PATH/HERE\"|-lhe=\"/YOUR/PATH/HERE\"] [--rwgtcard=/YOUR/PATH/HERE|-rwgt=\"/YOUR/PATH/HERE\"]\n" + << "[--output=/YOUR/PATH/HERE\"|-out=\"/YOUR/PATH/HERE\"]\n"; + std::cout << "\n"; + std::cout << "The LHE file path should be with respect to the directory you are running\n"; + std::cout << "this program from, and similarly the rwgt_card should be as well.\n"; + if( typeid(FORTRANFPTYPE(0)) == typeid(double(0)) ){ + std::cout << "The program is currently compiled with double precision.\n"; + } else if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ + std::cout << "The program is currently compiled with float precision.\n"; + } else{ std::cout << "The program is currently compiled with an unrecognised precision -- FPTYPE is neither float nor double.\n"; } + std::cout << "Numerical precision can only be redefined at compile time.\nIf you wish to change the precision, please recompile with the option \"FPTYPE=f\"/\"FPTYPE=d\"."; + return ret; +} + + +int main( int argc, char** argv ){ + std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; + + // READ COMMAND LINE ARGUMENTS + for( auto arg : argv ) + { + auto currArg = std::string( arg ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else + { + return usage( argv[0] ); + } + } + + if( lheFilePath.empty() || rwgtCardPath.empty() ){ + return usage( argv[0] ); + } + + std::string currPath = argv[0]; + + size_t slashPos = currPath.find_last_of( "/" ); + bool onWindows = false; + if( slashPos == std::string::npos ){ slashPos = currpath.find_last_of( "\\" ); onWindows = true; } + if( slashPos == std::string::npos ) + throw std::runtime_error( "Failed to determine current working directory -- need to know where program is run from to identify where to pull and push param_card.dat." ); + + if( onWindows ){ + if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "..\\..\\Cards\\param_card.dat"; + } else{ + slhaPath = "\\Cards\\param_card.dat"; + } + } else { + if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "../../Cards/param_card.dat"; + } else { + slhaPath = "/Cards/param_card.dat"; + } + } + + + PEP::PER::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + + auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); + + std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; + PEP::PER::rwgtRunner nuRun( fileCol, scatteringAmplitude ); + + + nuRun.runRwgt( outputPath ); + + return 0; + +} \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp new file mode 100644 index 0000000000..5c2eb2d3cd --- /dev/null +++ b/tools/REX/teawREX.hpp @@ -0,0 +1,470 @@ +/*** + * _ ______ _______ __ + * | | | ___ \ ___\ \ / / + * | |_ ___ __ ___ _| |_/ / |__ \ V / + * | __/ _ \/ _` \ \ /\ / / /| __| / \ + * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ + * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#include +#include +#include +#include +#include +#include +#include "REX.hpp" + +namespace REX::teaw +{ + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + struct rwgtVal : REX::paramVal{ + public: + std::string_view blockName; + bool allStat; + bool isAll(){ return (idStr == "all"); } + rwgtVal() : paramVal(){ return; } + rwgtVal( std::string_view paramLine ) + : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } + realLine = paramLine; + auto vals = *REX::nuBlankSplitter( realLine ); + blockName = vals[1]; + idStr = vals[2]; + valStr = vals[3]; + } + std::string_view getLine(){ return realLine; } + void outWrite( REX::paramBlock& srcBlock ){ + if ( isAll() ) + { + for( auto param : srcBlock.params ) + { + param.valStr = valStr; + param.modded = true; + } + return; + } + auto currPar = std::find_if( srcBlock.params.begin(), srcBlock.params.end(), + [&]( const REX::paramVal& parPar ){ return (parPar.idStr == idStr ); } ); + if( currPar == srcBlock.params.end() ){ + srcBlock.params.push_back( REX::paramVal( realLine.substr(realLine.find("set") + 4) ) ); + srcBlock.params[ srcBlock.params.size() - 1 ].modded = true; + srcBlock.modded = true; + return; + } + currPar->valStr = valStr; + currPar->modded = true; + srcBlock.modded = true; + return; + } + }; + + struct rwgtBlock { + public: + std::string_view name; + std::vector rwgtVals; + rwgtBlock( std::vector values = {}, std::string_view title = "" ) + { + name = title; + rwgtVals.resize( values.size() ); + for( int k = 0 ; k < values.size() ; ++k ) + { + rwgtVals[k] = rwgtVal( values[k] ); + } + } + rwgtBlock( const std::vector& vals, std::string_view title = "" ) + { + name = title; + rwgtVals = vals; + } + std::string_view getBlock(){ + if( written ){ return runBlock; } + runBlock = ""; + for( auto val : rwgtVals ){ + runBlock += std::string(val.getLine()) + "\n"; + } + written = true; + return runBlock; + } + void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) + { + for( auto parm : rwgtVals ) + { + parm.outWrite( srcBlock ); + } + srcBlock.modded = true; + return; + } + protected: + std::string runBlock; + bool written = false; + }; + + struct rwgtProc { + public: + std::vector rwgtParams; + std::string_view procString; + std::string_view rwgtName; + std::vector rwgtOpts; + void parse(){ + std::vector blocks; + std::vector>> params; + auto procLines = *REX::nuLineSplitter( procString ); + for( auto line : procLines ) + { + auto strtPt = line.find("set"); + auto words = *REX::nuWordSplitter( line ); + auto currBlock = words[1]; + auto loc = std::find_if( blocks.begin(), blocks.end(), + [&]( std::string_view block ){ return (block == currBlock); } ); + if( loc == blocks.end() ){ + blocks.push_back( currBlock ); + params.push_back( std::make_shared>( std::vector({rwgtVal( line )} ) )); } + else { + params[ std::distance( blocks.begin(), loc ) - 1 ]->push_back( rwgtVal( line ) ); + } + } + rwgtParams.reserve(blocks.size()); + for( int k = 0 ; k < blocks.size() ; ++k ) + { + rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); + } + } + rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ) + { + if( rwgtSet == "" ){ return; } + auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; + auto endLi = rwgtSet.find("\n", strtLi); + while( rwgtSet[rwgtSet.find_first_not_of("\n ", endLi)] == 's' ) + { endLi = rwgtSet.find( "\n", endLi + 1 ); } + procString = rwgtSet.substr( strtLi, endLi - strtLi ); + if( parseOnline ){ parse(); } + } + std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ + auto slhaOrig = std::make_shared( paramOrig ); + std::map blockIds; + for( int k = 0 ; k < slhaOrig->blocks.size() ; ++k ) + { slhaOrig->blocks[k].parse( true ); + auto nyama = std::pair( slhaOrig->blocks[k].name, k); + blockIds.insert( nyama ); } + for( auto rwgts : rwgtParams ) + { rwgts.outWrite( slhaOrig->blocks[ blockIds.at( rwgts.name ) ], blockIds ); } + slhaOrig->modded = true; + return slhaOrig; + } + std::string_view comRunProc(){ return procString; } + }; + + struct rwgtCard{ + public: + REX::lesHouchesCard slhaCard; + std::vector rwgtRuns; + std::vector rwgtProcs; + std::vector opts; + std::vector rwgtNames; + std::string_view srcCard; + void parse( bool parseOnline = false ) { + auto strt = srcCard.find("launch"); + while( auto commPos = srcCard.find_last_of("#", strt) > srcCard.find_last_of("\n", strt) ){ + if( commPos == REX::npos ){ + break; + } + strt = srcCard.find("launch", strt + 6 ); + } + while( auto chPos = srcCard.find( "set" ) < strt ){ + if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } + opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); + } + std::vector lnchPos({strt}); + auto nuLnch = srcCard.find( "launch", strt + 6 ); + while ( nuLnch != std::string_view::npos ) + { + if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } + nuLnch = srcCard.find( "launch", nuLnch + 6 ); + } + for( int k = 0 ; k < lnchPos.size() - 1 ; ++k ) + { + auto strtLi = srcCard.find( "set", lnchPos[k] ); + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); + if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ + auto strtPos = srcCard.find( "--", lnchPos[k] ); + while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ + auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); + if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. + rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); + } + if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } + strtPos = nuStrtPos; + } + } + } + size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); + if( srcCard.substr( endLi + 1, 3 ) == "set" ){ + while( srcCard.substr( endLi + 1, 3 ) == "set" ) + { + endLi = srcCard.find( "\n", endLi + 1 ); + } + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); + } + rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); + rwgtNames.reserve( rwgtRuns.size() ); + int p = 1; + for( auto run : rwgtRuns ){ + rwgtProcs.push_back( run.comRunProc() ); + if( run.rwgtName == "" ){ + rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); + } else { + rwgtNames.push_back( std::string(run.rwgtName) ); + } + } + } + rwgtCard( std::string_view reweight_card ){ + srcCard = reweight_card; + } + rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ){ + srcCard = reweight_card; + slhaCard = slhaParams; + if( parseOnline ){ parse( parseOnline ); } + } + std::vector> writeCards( REX::lesHouchesCard& slhaOrig ){ + std::vector> cardVec; + slhaOrig.parse(); + cardVec.reserve( rwgtRuns.size() ); + for( auto rwgt : rwgtRuns ) + { + cardVec.push_back( rwgt.outWrite( slhaOrig ) ); + } + return cardVec; + } + }; + + struct rwgtCollection { + public: + void setRwgt( std::shared_ptr rwgts ){ + if( rwgtSet ){ return; } + rwgtSets = rwgts; + rwgtSet = true; + } + void setRwgt( rwgtCard rwgts ){ + if( rwgtSet ){ return; } + setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; + } + void setSlha( std::shared_ptr slha ){ + if( slhaSet ){ return; } + slhaParameters = slha; + slhaParameters->parse(); + slhaSet = true; + } + void setSlha( REX::lesHouchesCard slha ){ + if( slhaSet ){ return; } + setSlha( std::make_shared( slha ) ); + slhaSet = true; + } + void setLhe( std::shared_ptr lhe ){ + if( lheFileSet ){ return; } + lheFile = lhe; + lheFileSet = true; + } + void setLhe( REX::lheNode lhe ){ + if( lheFileSet ){ return; } + setLhe( std::make_shared( lhe ) ); + lheFileSet = true; + } + void setLhe( std::string_view lhe_file ){ + if( lheFileSet ){ return; } + size_t strt = 0; + size_t post = *REX::nodeEndFind( lhe_file, strt ); + lheFile = REX::lheParser( lhe_file, strt, post ); + lheFileSet = true; + } + std::shared_ptr getRwgt(){ return rwgtSets; } + std::shared_ptr getSlha(){ return slhaParameters; } + std::shared_ptr getLhe(){ return lheFile; } + rwgtCollection(){ return; } + rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ + setLhe( lhe ); + setSlha( slha ); + setRwgt( rwgts ); + } + protected: + void setDoubles(){ + if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) + throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); + REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; + auto vecOfVecs = REX::lheValDoubles( *lheFile, returnBools ); + if( vecOfVecs->size() != 3 ) + throw std::runtime_error( "LHE file appears to contain multiple types of processes. This has not yet been implemented." ); + wgts = vecOfVecs->at( 0 ); gS = vecOfVecs->at( 1 ); momenta = vecOfVecs->at( 2 ); + } + std::shared_ptr rwgtSets; + std::shared_ptr slhaParameters; + std::shared_ptr lheFile; + std::shared_ptr> wgts; + std::shared_ptr> gS; + std::shared_ptr> momenta; + bool lheFileSet = false; + bool slhaSet = false; + bool rwgtSet = false; + }; + + struct rwgtFiles : rwgtCollection { + void setRwgtPath( std::string_view path ){ rwgtPath = path; } + void setSlhaPath( std::string_view path ){ slhaPath = path; } + void setLhePath( std::string_view path ){ lhePath = path; } + rwgtFiles() : rwgtCollection(){ return; } + rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ + setRwgtPath( reweight_card ); + setSlhaPath( slha_card ); + setLhePath( lhe_card ); + } + void initCards(){ + if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) + throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); + pullRwgt(); pullSlha(); pullLhe(); + setLhe( *lheCard ); + setSlha( std::make_shared( *slhaCard ) ); + setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); + setDoubles(); + } + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ){ + setLhePath( lhe_card ); + setSlhaPath( slha_card ); + setRwgtPath( reweight_card ); + initCards(); + } + protected: + void pullRwgt(){ + rewgtCard = REX::filePuller( rwgtPath ); + } + void pullSlha(){ + slhaCard = REX::filePuller( slhaPath ); + } + void pullLhe(){ + lheCard = REX::filePuller( lhePath ); + } + std::string rwgtPath; + std::string lhePath; + std::string slhaPath; + std::shared_ptr lheCard; + std::shared_ptr slhaCard; + std::shared_ptr rewgtCard; + }; + + struct rwgtRunner : rwgtFiles{ + public: + void setMeEval( std::function>(std::vector&, std::vector&)> eval ){ meEval = eval; meInit = true; } + rwgtRunner() : rwgtFiles(){ return; } + rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } + rwgtRunner( rwgtFiles& rwgts, std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( rwgts ){ + meEval = meCalc; + meInit = true; + } + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + meEval = meCalc; + meInit = true; + } + protected: + bool meInit = false; + bool meSet = false; + bool normWgtSet = false; + std::function>(std::vector&, std::vector&)> meEval; + std::shared_ptr> initMEs; + std::shared_ptr> meNormWgts; + std::shared_ptr rwgtGroup; + void setMEs(){ + initCards(); + if( !meInit ) + throw std::runtime_error( "No function for evaluating scattering amplitudes has been provided." ); + auto ins = meEval( *momenta, *gS ); + initMEs = std::make_shared>( ins->begin(), ins->begin() + wgts->size() ); + meSet = true; + } + bool setParamCard( std::shared_ptr slhaParams ){ + if( slhaPath == "" ) + throw std::runtime_error( "No parameter card path has been provided." ); + if( slhaParameters == nullptr ) + throw std::runtime_error( "No SLHA parameter card has been provided." ); + if( !REX::filePusher( slhaPath, *slhaParams->selfWrite() ) ) + throw std::runtime_error( "Failed to overwrite parameter card." ); + return true; + } + void setNormWgts(){ + if( !meSet ){ setMEs(); } + if( initMEs->size() != wgts->size() ) + throw std::runtime_error( "Inconsistent number of events and event weights." ); + meNormWgts = std::make_shared>( wgts->size() ); + for( size_t k = 0; k < initMEs->size(); k++ ){ + meNormWgts->at( k ) = wgts->at( k ) / initMEs->at( k ); + } + normWgtSet = true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + auto newMEs = meEval( *momenta, *gS ); + auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + auto newMEs = meEval( *momenta, *gS ); + auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ){ + bool writeSuccess = REX::filePusher( outputDir, *lheFile->nodeWriter() ); + if( !writeSuccess ) + throw std::runtime_error( "Failed to write LHE file." ); + return true; + } + public: + void runRwgt( const std::string& output ){ + setMEs(); + setNormWgts(); + rwgtGroup = std::make_shared(); + auto currInd = lheFile->header->addWgtGroup( rwgtGroup ); + auto paramSets = rwgtSets->writeCards( *slhaParameters ); + for( int k = 0 ; k < paramSets.size(); k++ ){ + singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); + std::cout << "."; + } + lheFileWriter( lheFile, output ); + REX::filePusher( slhaPath, *slhaCard ); + std::cout << "\nReweighting done.\n"; + } + }; +} \ No newline at end of file From 15a2a650f3ec10b4cfff8e2095759e8c856ee397 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 27 Feb 2024 09:19:24 +0100 Subject: [PATCH 10/76] major changes to REX and teawREX, plus first base for template runfiles for MG reweighting --- tools/REX/REX.hpp | 1923 +++++++++++++++++++++++++++++++++---- tools/REX/rwgt_driver.cc | 115 +++ tools/REX/rwgt_instance.h | 69 ++ tools/REX/rwgt_runner.cc | 134 +++ tools/REX/teawREX.hpp | 247 ++++- 5 files changed, 2232 insertions(+), 256 deletions(-) create mode 100644 tools/REX/rwgt_driver.cc create mode 100644 tools/REX/rwgt_instance.h create mode 100644 tools/REX/rwgt_runner.cc diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index 703f799d95..c97f3e6a27 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -12,9 +12,12 @@ // IF YOU SEE THIS FILE, IT HAS BEEN SPREAD // FROM AN IMPROPER RELEASE. -// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. +#ifndef _REX_HPP_ +#define _REX_HPP_ + #include #include #include @@ -28,7 +31,10 @@ #include #include #include +#include #include +#include +#include // ZW: all fcns within the REX standard sit in the // namespace REX @@ -40,6 +46,69 @@ namespace REX #pragma warning( disable : 4101) static const size_t npos = -1; #pragma warning( pop ) + + using sortFcn = std::function>(std::vector)>; + using statSort = std::function>(std::string_view, std::vector)>; + + // ZW: index sorting function, which returs vector + // of the indices of the original vector sorted + // by default in ascending order + // ie, for [5.0, 0.25, 2.0, 9.2] returns [1, 2, 0, 3] + template + std::shared_ptr> indSort(const std::vector &vector, std::function comp = std::less()) + { + auto sorted = std::make_shared>(vector.size()); + std::iota(sorted->begin(), sorted->end(), 0); + std::stable_sort(sorted->begin(), sorted->end(), [&](size_t i, size_t j) { return comp(vector[i], vector[j]); }); + return sorted; + } + + // ZW: wrapper for indSort for comparing string-type arguments representing integers + template + std::shared_ptr> stoiSort(const std::vector &vector) + { + std::function stoicomp = [](const T& i, const T& j) { return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; + return indSort(vector, stoicomp); + } + + // ZW: wrapper for indSort for comparing string-type arguments representing doubles + template + std::shared_ptr> stodSort(const std::vector &vector) + { + std::function stodcomp = [](const T& i, const T& j) { return std::stod(std::string(i)) < std::stod(std::string(j)); }; + return indSort(vector, stodcomp); + } + + // ZW: templated fcn for finding the order of elements in a vector to_sort + // based on their order in a reference vector reference + // Elements not found in reference are represented by npos, + // including if to_sort is longer than reference + template + std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort) { + std::unordered_map> indexMap; + + // Populate indexMap with indices from vec1 + for (size_t i = 0; i < reference.size(); ++i) { + indexMap[reference[i]].push(i); + } + + std::shared_ptr> order; + order->reserve(to_sort.size()); // Pre-allocate memory + + for (const auto& elem : to_sort) { + auto it = indexMap.find(elem); + if (it != indexMap.end() && !it->second.empty()) { + order->push_back(it->second.front()); + it->second.pop(); + } else { + // Element in vec2 not found in vec1 + order->push_back(npos); + } + } + + return order; + } + // ZW: minimal fcn for counting the amount of times // a given search term appears in a string int nuStrCount( std::string_view searchString, std::string_view searchTerm ) @@ -79,7 +148,7 @@ namespace REX auto lineBreaks = nuFindEach( currEvt, "\n" ); std::vector trueBreaks; trueBreaks.reserve( lineBreaks->size() ); - for( int k = 0 ; k < lineBreaks->size() - 1 ; ++k ) + for( size_t k = 0 ; k < lineBreaks->size() - 1 ; ++k ) { if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} trueBreaks.push_back( (*lineBreaks)[k] ); @@ -92,7 +161,7 @@ namespace REX splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); startPos = k; } - if( auto strung = currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } + if( currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } return splitLines; } @@ -105,7 +174,7 @@ namespace REX auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); auto truBreaks = std::make_shared>(); truBreaks->reserve( lineBreaks->size() ); - for( int k = 0 ; k < lineBreaks->size() ; ++k ) + for( size_t k = 0 ; k < lineBreaks->size() ; ++k ) { if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} truBreaks->push_back( (*lineBreaks)[k] ); @@ -237,26 +306,46 @@ namespace REX // ZW: fcn for finding left angle bracket // indicating the start of a new node in an XML file - std::shared_ptr nodeStartFind( std::string_view parseFile, size_t strtPos ) + size_t nodeStartFind( std::string_view parseFile, size_t strtPos ) { - auto retPtr = std::make_shared(parseFile.find("<", strtPos)); - while( parseFile[*retPtr + 1] == '!' || parseFile[*retPtr +1] == '/' || parseFile[*retPtr +1] == '?' ){ - *retPtr = parseFile.find("<", *retPtr +1); + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] == '!' || parseFile[retPtr +1] == '/' || parseFile[retPtr +1] == '?' ){ + retPtr = parseFile.find("<", retPtr +1); } return retPtr; } + size_t endNodeStartFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeStartFind( parseFile, strtPos )); + } + + std::pair startNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeStartFind( parseFile, strtPos ), endNodeStartFind( parseFile, strtPos ) }; + } + // ZW: fcn for finding left angle bracket // indicating an end of a node in an XML file - std::shared_ptr nodeEndFind( std::string_view parseFile, size_t strtPos ) - { - auto retPtr = std::make_shared(parseFile.find("<", strtPos)); - while( parseFile[*retPtr + 1] != '/' ){ - *retPtr = parseFile.find("<", *retPtr +1); - } + size_t nodeEndFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] != '/' ){ + retPtr = parseFile.find("<", retPtr +1); + } return retPtr; } + size_t endNodeEndFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeEndFind( parseFile, strtPos )); + } + + std::pair endNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeEndFind( parseFile, strtPos ), endNodeEndFind( parseFile, strtPos ) }; + } + // ZW: struct for handling tags in XML node opening tags struct xmlTag { public: @@ -291,18 +380,153 @@ namespace REX return tagPtr; } + // ZW: struct for handling the tree structure of XML files, + // essentially just giving the positions of the beginning and + // end of each node s.t. the proper node structures can accurately + // detail where children begin and end while allowing for personal + // content between child nodes + struct xmlTree { + public: + xmlTree(){ return; } + xmlTree( std::string_view file ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v"); + if( file.compare(start, 1, "<") != 0 ) { + faux = true; + contSt = start; + end = std::min( nodeStartFind(file, start), nodeEndFind(file, start) ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 || file.compare(start + 1, 1, "?") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + end = std::min( nodeStartFind(file, contEnd), nodeEndFind(file, contEnd) ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + initialised = true; + return; + } + contSt = stEnd + 1; + auto stPos = nodeStartFind(file, start + 1); + stEnd = nodeEndFind(file, start + 1); + contEnd = std::min(stPos, stEnd); + while( stPos < stEnd ) + { + children->push_back( std::make_shared( file, stPos, stEnd ) ); + } + stEnd = endNodeEndFind(file, stEnd); + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + initialised = true; + } + xmlTree( std::string_view file, size_t& strt, size_t& nd ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v", strt); + if( file.compare(start, 1, "<") != 0) { + faux = true; + contSt = start; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + end = std::min( strt, nd ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + strt = nodeStartFind(file, contEnd); + nd = nodeEndFind(file, contEnd); + end = std::min( strt, nd ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + initialised = true; + return; + } + contSt = stEnd + 1; + strt = nodeStartFind(file, start + 1); + nd = nodeEndFind(file, start + 1); + contEnd = std::min(strt, nd); + while( strt < nd ) + { + children->push_back( std::make_shared( file, strt, nd ) ); + } + end = file.find_first_not_of(" \n\r\f\t\v", endNodeEndFind(file, nd) + 1); + initialised = true; + strt = end; + nd = nodeEndFind(file, strt); + } + auto& getChildren(){ return children; } + std::string_view& getOrigin(){ return origin; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + size_t getContStart(){ return contSt; } + size_t getContEnd(){ return contEnd; } + bool isFaux(){ return faux; } + bool isInit(){ return initialised; } + bool hasChildren(){ return children->size() > 0; } + protected: + std::shared_ptr>> children; // vector of pointers to children nodes + std::string_view origin; + size_t start; // position of opening bracket of node opening + size_t end; // position of final character of ending node, including trailing blankspace + size_t contSt; + size_t contEnd; + bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes + bool initialised; + }; + // ZW: struct for handling nodes in generic XML files struct xmlNode { public: xmlNode(){ modded = false; return; } xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ - modded = false; xmlFile = originFile; start = begin; children = childs; - if( xmlFile.substr(start, 1) != "<" ){ start = *nodeStartFind( xmlFile, size_t(start) ); } - size_t trueStart = xmlFile.find_first_not_of(" ", start+1); - name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ ", trueStart) - trueStart ); - if( xmlFile.find( ">", trueStart ) < xmlFile.find( "/", trueStart ) ){ - content = xmlFile.substr( xmlFile.find( ">", trueStart ) + 1, xmlFile.find( "", trueStart ) - 1 ); - } + modded = false; + xmlFile = originFile; + structure = xmlTree( originFile ); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto& child : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *child ) ); + } + } + xmlNode( xmlTree &tree ){ + modded = false; + structure = tree; + if( !structure.isInit() ){ return; } + xmlFile = structure.getOrigin(); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto& child : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *child ) ); + } } std::vector> getChildren(){ return children; } std::vector> getTags(){ return tags; } @@ -311,6 +535,7 @@ namespace REX std::string_view getContent(){ return content; } size_t getStart(){ return start; } size_t getEnd(){ return end; } + xmlTree getTree(){ return structure; } virtual bool isModded(){ return modded; } virtual bool isModded( bool deep ){ bool modStat = isModded(); @@ -320,6 +545,8 @@ namespace REX } bool isWritten(){ return written; } bool isParsed(){ return parsed; } + bool isFaux(){ return faux; } + bool hasChildren(){ return children.size() > 0; } void setModded( bool mod ){ modded = mod; } bool deepModded(){ return deepMod; } bool deepParse(){ return deepParsed; } @@ -347,32 +574,16 @@ namespace REX } bool parseTop(){ if( xmlFile == "" ){ return false; } + if( isFaux() ){ return true; } size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } return true; } virtual bool parseContent(){ if( xmlFile == "" ){ return false; } - auto firstR = xmlFile.find_first_of( ">/", start ); - auto nodeStrEnd = xmlFile.find(">", firstR); - if( firstR < nodeStrEnd ){ content = ""; end = nodeStrEnd + 2; parsed = true; return true; } - auto endNode = *nodeEndFind( xmlFile, start ); - auto startNode = *nodeStartFind( xmlFile, start + 1 ); - if( startNode > endNode ){end = xmlFile.find( ">", endNode ) + 1; content = xmlFile.substr( xmlFile.find( ">", start ) + 1, endNode - xmlFile.find( ">", start ) - 1 ); return true; } - auto endPt = xmlFile.find( std::string("", start) + 1, startNode - xmlFile.find(">") - 1 ); - end = xmlFile.find( ">", endPt ) + 2; - while( startNode < endNode ){ - auto nextNode = std::make_shared( xmlFile, startNode ); - children.push_back( nextNode ); - int starts = 0; - while( startNode < endNode ) - { - startNode = *nodeStartFind( xmlFile, startNode + 1 ); - ++starts; - } - for( int k = 0 ; k < starts ; ++k ){ endNode = *nodeEndFind( xmlFile, endNode + 1 ); } - if( endNode > end ){ break; } + end = structure.getContEnd(); + for( auto branch : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *branch ) ); } return true; } @@ -393,10 +604,14 @@ namespace REX } return status; } - std::shared_ptr writtenSelf; - bool deepMod = false; + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + xmlTree structure; std::vector> children; std::vector> tags; + std::shared_ptr writtenSelf; + bool deepMod = false; std::string_view xmlFile; std::string_view name; std::string_view content; @@ -406,10 +621,9 @@ namespace REX bool written = false; bool parsed = false; bool deepParsed = false; - std::string nodeHeader; - std::string nodeContent; - std::string nodeEnd; + bool faux = false; virtual void headWriter() { + if( isFaux() ){ return; } nodeHeader = "<" + std::string(name) ; for( auto tag : tags ){ nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; @@ -417,10 +631,12 @@ namespace REX nodeHeader += ">"; } virtual void endWriter() { - nodeEnd = "\n"; + if( isFaux() ){ return; } + auto endSt = xmlFile.find_last_of("<", end); + nodeEnd = xmlFile.substr( endSt, end - endSt ); } virtual void contWriter() { - if( children.size() > 0 ){ + if( hasChildren() ){ nodeContent = std::string(content.substr(0, children[0]->start - 1 )); } else { nodeContent = std::string(content); @@ -449,8 +665,6 @@ namespace REX written = true; modded = false; } else if( !isWritten() ){ - endFinder(); - if( start > xmlFile.size() ){ start = 0; } writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); written = true; } @@ -461,10 +675,15 @@ namespace REX for( auto child : children ) { child->childCounter( noChilds ); - if( child->end == 0 ){ --noChilds; } + if( child->end == 0 || child->isFaux() ){ --noChilds; } } noChilds += children.size(); - } + } + virtual int childCounter() { + int noChilds = 0; + childCounter( noChilds ); + return noChilds; + } virtual std::shared_ptr nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; @@ -480,7 +699,7 @@ namespace REX auto currNode = std::make_shared(parseFile, initPos); size_t equalSign = parseFile.find("=", initPos); size_t nodeInitEnd = parseFile.find(">", initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } @@ -489,14 +708,14 @@ namespace REX currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } // ZW: struct for handling rwgt parameter sets // in the LHE header initrwgt node - struct headWeight : xmlNode { + struct headWeight : public xmlNode { public: int getId(){ return id; } std::string_view getTag(){ return idTag; } @@ -536,6 +755,36 @@ namespace REX } } } + headWeight( xmlTree& tree ) : xmlNode( tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( xmlTree* tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } @@ -551,6 +800,7 @@ namespace REX if( idTag == "" ){ nodeHeader = ""; return; } if( id == npos ){ nodeHeader = ""; return; } nodeHeader = ""; + return; } nodeHeader = "( nodeHeader + nodeContent + nodeEnd ); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); written = true; modded = false; } @@ -611,7 +860,7 @@ namespace REX // ZW: struct for handling rwgt groups // in the LHE header initrwgt node - struct weightGroup : xmlNode { + struct weightGroup : public xmlNode { public: bool getIncId(){ return includeId; } void setIncId( bool nuIncId ){ includeId = nuIncId; } @@ -636,6 +885,42 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode( originFile, begin, childs ){ name = "weightgroup"; @@ -672,6 +957,7 @@ namespace REX } void childWriter() override{ for(auto child : children){ + if( child->getName() == "weight" ){ continue; } nodeContent += (*child->nodeWriter()); } } @@ -682,7 +968,7 @@ namespace REX void endWriter() override{ nodeEnd = "\n"; } }; - struct initRwgt : xmlNode { + struct initRwgt : public xmlNode { public: std::vector> getGroups(){ return groups; } size_t noGrps(){ return groups.size(); } @@ -718,6 +1004,14 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } + initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; @@ -726,6 +1020,14 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } + initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } protected: bool grpIsInit = false; bool grpInit( std::shared_ptr& wgt ){ @@ -755,9 +1057,9 @@ namespace REX } }; - // ZW: struct for handling event + // ZW: struct for handling weights // in event blocks of LHE files - struct bodyWgt : xmlNode { + struct bodyWgt : public xmlNode { public: void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} @@ -787,6 +1089,36 @@ namespace REX valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); valD = std::stod( valS ); } + bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } bodyWgt( double value, std::string& idTag ){ setVal( value ); id = idTag; @@ -893,7 +1225,7 @@ namespace REX evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) { if( originFile.size() == 0){ return; } - beginLine = originFile.find_first_not_of("\n ", beginLine); + beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } sourceFile = originFile.substr( beginLine, endLine - beginLine ); auto evLine = nuWordSplitter( sourceFile ); @@ -921,7 +1253,7 @@ namespace REX if( !isModded() ){ content = std::make_shared( sourceFile ); return; } auto retText = std::make_shared( " " ); *content = " " + std::string( nprt ); - for( int k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } + for( size_t k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); if( comment != "" ){ *content += " # " + std::string( comment ); } *content += "\n"; @@ -962,6 +1294,14 @@ namespace REX return content; } lhePrt(){ return; } + lhePrt( std::pair prtInfo ){ + status = std::to_string( prtInfo.first ); + pdg = std::to_string( prtInfo.second ); + } + lhePrt( std::pair& prtInfo ){ + status = std::to_string( prtInfo.first ); + pdg = std::to_string( prtInfo.second ); + } lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) { sourceFile = originFile.substr( beginLine, endLine - beginLine ); @@ -998,7 +1338,7 @@ namespace REX if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } *content = ""; - for( int k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } + for( size_t k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } *content += std::string(pdg) + " " + std::string(status); for( auto mum : mothers ){ *content += " " + std::string( mum ); } for( auto col : icol ){ *content += " " + std::string( col ); } @@ -1012,7 +1352,7 @@ namespace REX }; // ZW: struct for handling LHE format event block - struct event : xmlNode { + struct event : public xmlNode { public: evHead getHead(){ return header; } std::vector> getPrts(){ return prts; } @@ -1038,9 +1378,19 @@ namespace REX return modStat; } event(){ return; } + event( std::vector> prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + for( auto prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event( std::vector> prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + prts = prtInfo; + } event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode(originFile, begin, childs) { - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); if( trueStart == npos ){ return; } auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); @@ -1052,7 +1402,62 @@ namespace REX } event( const xmlNode& originFile ) : xmlNode( originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" ", start+1); + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( const xmlNode* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( const std::shared_ptr& originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( xmlTree& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( xmlTree* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event( std::shared_ptr originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); prts.reserve(vals->size()); @@ -1068,6 +1473,9 @@ namespace REX bool headIsMod(){ return header.isModded(); } + bool isSpecSort() const { return specSorted; } + sortFcn getSortFcn() const { return eventSort; } + statSort getStatSort() const { return specSort; } protected: std::vector> rwgt; std::shared_ptr childRwgt; @@ -1083,7 +1491,65 @@ namespace REX bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } evHead header; + bool hasBeenProc = false; std::vector> prts; + std::map> procMap; + std::map> procOrder; + sortFcn eventSort = []( std::vector vec ){ return stodSort( vec ); }; + statSort specSort = []( std::string_view stat, std::vector vec ){ return stodSort( vec ); }; + bool specSorted = false; + bool initProcMap(bool hard = false) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *stoiSort( stat->second ); + } + hasBeenProc = true; + return true; + } + bool initProcMap( sortFcn sorter, bool hard = false ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = false; + eventSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter( stat->second ); + } + hasBeenProc = true; + return true; + } + bool initProcMap( statSort sorter, bool hard = false ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = true; + specSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter(stat->first, stat->second ); + } + hasBeenProc = true; + return true; + } bool inRwgtChild( std::string_view name ){ for( auto child : childRwgt->getChildren() ){ for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } @@ -1179,6 +1645,38 @@ namespace REX if( addedWgt ){ appendWgts(); } return writtenSelf; } + auto &getProc(){ + if( initProcMap() ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(){ + if( initProcMap() ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProc() const{ + if ( hasBeenProc ){ return procMap; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + auto &getProcOrder() const{ + if ( hasBeenProc ){ return procOrder; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + auto &getProc(sortFcn sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(sortFcn sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProc(statSort sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + auto &getProcOrder(statSort sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } }; // ZW: struct for handling the first line of @@ -1322,7 +1820,7 @@ namespace REX // ZW: struct for handling single DECAY line // in SLHA format parameter card - struct decVal : paramVal{ + struct decVal : public paramVal{ public: void parse() override { auto vals = *nuBlankSplitter( realLine ); @@ -1411,7 +1909,7 @@ namespace REX // ZW: struct for handling DECAY lines // in SLHA format parameter card - struct decBlock : paramBlock { + struct decBlock : public paramBlock { public: std::vector decays; void parse( bool parseOnline = false ) override{ @@ -1479,7 +1977,7 @@ namespace REX auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); - for( int k = 0 ; k < blockPts->size() - 1 ; ++k ) + for( size_t k = 0 ; k < blockPts->size() - 1 ; ++k ) { blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); } @@ -1490,7 +1988,7 @@ namespace REX parsed = true; } lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ - xmlFile = originFile; start = begin; size_t trueStart = originFile.find_first_not_of("\n ", begin+1); + xmlFile = originFile; start = begin; modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" getParameters(){ modded = true; @@ -1534,6 +2032,15 @@ namespace REX slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } + slhaNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); + } + slhaNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } + slhaNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) : xmlNode( originFile, begin ){ if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } @@ -1559,7 +2066,7 @@ namespace REX }; // ZW: struct for handling LHE init nodes - struct initNode : xmlNode { + struct initNode : public xmlNode { public: std::shared_ptr getHead(){ return initHead; } std::vector> getLines(){ return initLines; } @@ -1569,8 +2076,31 @@ namespace REX initNode() : xmlNode(){ name = "init"; } initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) : xmlNode( originFile, begin ){ - auto strtPt = originFile.find_first_not_of(" \n", originFile.find(">", start+1)); - content = originFile.substr( strtPt, originFile.find(" node, bool parseOnline = false ) : xmlNode( *node ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } } protected: std::shared_ptr initHead; @@ -1580,7 +2110,7 @@ namespace REX auto linebreaks = lineFinder( content ); if( linebreaks->size() == 0 ){ return false; } initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); - for( int k = 0 ; k < linebreaks->size() - 1 ; ++k ){ + for( size_t k = 0 ; k < linebreaks->size() - 1 ; ++k ){ initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); } return true; @@ -1595,7 +2125,7 @@ namespace REX }; // ZW: struct for explicitly handling LHE header nodes - struct lheHead : xmlNode { + struct lheHead : public xmlNode { public: size_t addWgtGroup( std::shared_ptr& wgtGroup ){ hasRwgt = true; @@ -1614,30 +2144,30 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - void addWgt( unsigned int index, std::shared_ptr nuWgt ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, std::shared_ptr nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, headWeight nuWgt ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, headWeight nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, std::shared_ptr nuWgt, std::string idTagg ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; nuWgt->setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( unsigned int index, headWeight nuWgt, std::string idTagg ){ - if( index >= (int)rwgtNodes->getGroups().size() ) + void addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; @@ -1656,6 +2186,46 @@ namespace REX : xmlNode(originFile, begin, childs){ xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if (child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlNode& node ) : xmlNode(node){ + for( auto child : node.getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlNode* node ) : xmlNode(*node){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( std::shared_ptr node ) : xmlNode( *node ){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlTree tree ) : xmlNode( tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead( xmlTree* tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } } protected: bool wgtGrpIsInit = false; @@ -1677,7 +2247,7 @@ namespace REX void setRelChild(){ if( relChildSet ){ return; } relChild.reserve( children.size() ); - for( int k = 0 ; k < children.size() ; ++k ){ + for( size_t k = 0 ; k < children.size() ; ++k ){ auto child = &children[k]; if( (*child)->getName() == "slha" ){ continue; } if( (*child)->getName() == "initrwgt" ){ continue; } @@ -1787,17 +2357,22 @@ namespace REX }; // ZW: general struct for handling LHE files explicitly - struct lheNode : xmlNode { + struct lheNode : public xmlNode { public: - std::vector> events = {}; - std::shared_ptr header = std::make_shared(xmlFile, start); - std::shared_ptr init = std::make_shared(xmlFile, start); lheNode() : xmlNode(){} lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) : xmlNode(originFile, begin, childs){ - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); - if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } + if( child->getName() == "init" ){ init = std::make_shared( *child ); continue; } + if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } + } } + auto getHeader(){ return header; } + auto getInit(){ return init; } + auto& getEvents(){ return events; } bool isModded() override{ return modded; } bool isModded( bool deep ) override{ if( !deep ){ return isModded(); } @@ -1806,21 +2381,47 @@ namespace REX for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } return modStat; } + void setInit( std::shared_ptr initNod ){ init = initNod; } + void setHeader( std::shared_ptr headNod ){ header = headNod; } void addWgt( size_t index, newWgt& addedWgt ){ header->addWgt( index, addedWgt.getHeadWgt() ); auto wgtsVec = addedWgt.getBodyWgts(); - for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ header->addWgt( index, addedWgt.getHeadWgt(), idTag ); auto wgtsVec = addedWgt.getBodyWgts(); - for( int k = 0 ; k < wgtsVec.size() ; ++k ){ + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } + void setRelStats( std::vector& particles ){ + relStat = particles; + } + std::vector& getRelStats(){ + return relStat; + } + void setSameSort( sortFcn& sortF ){ + particleSort = sortF; + } + sortFcn& getSameSort(){ + return particleSort; + } + void setStatSort( statSort& statS ){ + statParticleSort = statS; + } + statSort& getStatSort(){ + return statParticleSort; + } protected: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + std::vector relStat = {"-1", "1"}; + sortFcn particleSort = []( std::vector prts ){ return stodSort(prts); }; + statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stodSort(prts); }; virtual void headerWriter(){ nodeContent += "\n" + *header->nodeWriter(); } @@ -1860,17 +2461,18 @@ namespace REX // ZW: function for extracting event information from // LHE files - std::vector>> valExtraction( const lheNode& lheFile ) + std::vector>> valExtraction( lheNode& lheFile ) { bool getGs = true; auto momVec = std::make_shared>(); auto wgtVec = std::make_shared>(); auto gVec = std::make_shared>(); - momVec->reserve( lheFile.events.size() * 4 * std::stoi(std::string(lheFile.events[0]->getHead().getNprt())) ); - wgtVec->reserve( lheFile.events.size() ); - gVec->reserve( lheFile.events.size() ); + auto events = lheFile.getEvents(); + momVec->reserve( events.size() * 4 * std::stoi(std::string(events[0]->getHead().getNprt())) ); + wgtVec->reserve( events.size() ); + gVec->reserve( events.size() ); if( getGs ){ - for( auto event : lheFile.events ) + for( auto event : events ) { wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); @@ -1882,7 +2484,7 @@ namespace REX } } } else{ - for( auto event : lheFile.events ) + for( auto event : events ) { wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); @@ -1902,7 +2504,7 @@ namespace REX std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); @@ -1912,8 +2514,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -1922,14 +2524,12 @@ namespace REX std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { - auto nuStrtPos = *nodeStartFind( parseFile, initPos); currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ - auto nuLine = parseFile.find("\n", parseFile.find("<", initPos)); currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); } if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ @@ -1941,8 +2541,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -1951,32 +2551,19 @@ namespace REX std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) { auto currNode = std::make_shared(parseFile, initPos); - initPos = *nodeStartFind( parseFile, initPos + 1 ); + initPos = nodeStartFind( parseFile, initPos + 1 ); while( initPos < endPos ) { - auto nuStrtPos = *nodeStartFind( parseFile, initPos); - //if( nuStrtPos == parseFile.find("events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); - // continue; - //} else if( nuStrtPos == parseFile.find("header = lheHeadParser( parseFile, initPos, endPos ); - // continue; - //} else if( nuStrtPos == parseFile.find("init = std::make_shared( parseFile, initPos ); - // initPos = *nodeStartFind( parseFile, endPos ); - // endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); - // continue; - //} if( parseFile.substr( initPos, 6 ) == "events.push_back( evPtrParsor( parseFile, initPos, endPos ) ); + currNode->getEvents().push_back( evPtrParsor( parseFile, initPos, endPos ) ); continue; } else if( parseFile.substr( initPos, 7 ) == "header = lheHeadParser( parseFile, initPos, endPos ); + currNode->setHeader(lheHeadParser( parseFile, initPos, endPos )); continue; } else if( parseFile.substr( initPos, 5 ) == "init = std::make_shared( parseFile, initPos ); - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, *nodeEndFind( parseFile, endPos + 1 ) + 1); + currNode->setInit( std::make_shared( parseFile, initPos ) ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, nodeEndFind( parseFile, endPos + 1 ) + 1); continue; } else { currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); @@ -1987,8 +2574,8 @@ namespace REX while( equalSign < nodeInitEnd ){ currNode->addTag( xmlTagParser(parseFile, equalSign) ); } - initPos = *nodeStartFind( parseFile, endPos ); - endPos = *nodeEndFind( parseFile, endPos + 1 ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); return currNode; } @@ -2003,18 +2590,48 @@ namespace REX std::vector plusTwo; std::vector plusThree; std::vector minusNine; + std::vector orderMOne; + std::vector orderOne; + std::vector orderMTwo; + std::vector orderTwo; + std::vector orderThree; + std::vector orderNine; std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; + std::map> orderVecs{{"-1", orderMOne}, {"1", orderOne}, {"-2", orderMTwo}, {"2", orderTwo}, {"3", orderThree}, {"9",orderNine}}; lheProc( event& eventNode ) { for( auto prt : eventNode.getPrts() ) { valVecs[prt->getStatus()].push_back(prt->getPDG()); } + for( auto valVec = valVecs.begin() ; valVec!= valVecs.end() ; ++valVec ){ + if( valVec->second.size() == 0 ){ continue; } + orderVecs[valVec->first] = *stoiSort( valVec->second ); + } + } + std::shared_ptr writer(){ + auto written = std::make_shared(); + for( auto inits : valVecs["-1"] ){ + written->append(inits); + written->append(" "); + } + if( valVecs["2"].size() > 0 ){ + written->append("> "); + for( auto inits : valVecs["2"] ){ + written->append(inits); + written->append(" "); + } + } + written->append("> "); + for( auto inits : valVecs["1"] ){ + written->append(inits); + written->append(" "); + } + return written; } }; - // ZW: fcn for uploading text files - // to the program, pushing all characters to lowercase + // ZW: fcn for uploading text files to the program std::shared_ptr filePuller( const std::string& fileLoc ) { std::ifstream fileLoad( fileLoc ); @@ -2037,9 +2654,9 @@ namespace REX return true; } - // ZW: fcn for extracting the fill + // ZW: fcn for extracting the full // process information from an LHE event - std::shared_ptr>> pgdXtract( event& currEv, const std::vector& pdgVec ) + std::shared_ptr>> pdgXtract( event& currEv ) { auto currProc = std::make_shared>>(); auto &useProc = *currProc; @@ -2049,52 +2666,193 @@ namespace REX } return currProc; } + + template + bool chaoticVecComp( const std::vector& vec1, const std::vector order1, const std::vector& vec2, const std::vector order2 ) + { + if( vec1.size()!= vec2.size() ){ return false; } + for( size_t i = 0; i < vec1.size(); i++ ){ + if( vec1[order1[i]]!= vec2[order2[i]] ){ return false; } + } + return true; + } - // ZW: fcn for comparing two processes it the - // format output by pgdXtract + // ZW: fcn for comparing two processes in the + // format output by pdgXtract bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& pdgVec ) + std::vector>& secVec, const std::vector& statVec ) { if( firstVec.size() != secVec.size() ){return false;} - for(auto code : pdgVec ) + for(auto code : statVec ) { if( firstVec[code] != secVec[code] ){ return false; } } return true; } + bool sameProcString( std::map>& firstVec, std::map>& firstOrder, + std::map>& secVec, std::map>& secondOrder, + std::vector& statVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : statVec ) + { + if( !chaoticVecComp(firstVec[code], firstOrder[code], secVec[code], secondOrder[code]) ){ return false; } + } + return true; + } + // ZW: fcn for processes in the lheProc struct format - bool procComp( const lheProc& firstProc, const lheProc& secProc, const std::vector& pdgVec ) + bool procComp( lheProc& firstProc, lheProc& secProc, std::vector statVec ) { - for( auto stat : pdgVec ) + for( auto stat : statVec ) { if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } - if( firstProc.valVecs.at(stat) != secProc.valVecs.at(stat) ){ return false; } + if( !chaoticVecComp( firstProc.valVecs[stat], firstProc.orderVecs[stat], secProc.valVecs[stat], secProc.orderVecs[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc()[stat].size()!= secEv.getProc()[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc()[stat], firstEv.getProcOrder()[stat], + secEv.getProc()[stat], secEv.getProcOrder()[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } } return true; } - // ZW: fcn for checking whether a list of pdgKtract format + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + struct eventComp{ + bool operator()( event& firstEv, event& secEv){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool operator()( const event& firstEv, const event& secEv) const { + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool operator()(event& firstEv, event& secEv, std::vector statVec){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} + } + }; + + // ZW: fcn for checking whether a list of pdgXtract format // processes sourceProcList contains a given process newProc bool procVecContains( std::vector>>>& sourceProcList, - std::map>& newProc, const std::vector& pdgVec ) - { - int noProcs = sourceProcList.size(); + std::map>& newProc, const std::vector& statVec ) + {\ for( auto proc : sourceProcList ) { - if( sameProcString( *proc, newProc, pdgVec ) ){ return true; } + if( sameProcString( *proc, newProc, statVec ) ){ return true; } } return false; } // ZW: fcn for checking whether a vector of lheProc structs // procList contains a given lheProc nuProc - bool procListComp( const std::vector>& procList, const lheProc& nuProc, const std::vector& pdgVec ) + bool procListComp( const std::vector>& procList, lheProc& nuProc, std::vector statVec ) { if( procList.size() != 0 ){ for(auto proc : procList ) { - if( procComp( *proc, nuProc, pdgVec ) ){ return true; } + if( procComp( *proc, nuProc, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + sortFcn sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + statSort sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } } } return false; @@ -2102,42 +2860,85 @@ namespace REX // ZW: fcn for extracting the different processes // in a given REX format LHE file in the pdgXtract format - std::vector>>> procExtractor( const lheNode& lheFile ) + std::vector>>> procExtractor( lheNode& lheFile ) { std::vector>>> procList; - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; - for( auto event : lheFile.events ) + const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + for( auto event : lheFile.getEvents() ) { - auto currProc = pgdXtract( *event, pdgVec ); - if( procVecContains( procList, *currProc, pdgVec ) ){ continue; } + auto currProc = pdgXtract( *event ); + if( procVecContains( procList, *currProc, statVec ) ){ continue; } procList.push_back(currProc); } return procList; } - // ZW: fcn for extracting the differenty processes + // ZW: fcn for extracting the different processes // in a given REX format LHE file in the lheProc format - std::vector> processPull( const lheNode& lheFile ) + std::vector> processPull( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) { - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; std::vector> procsList{}; - for( auto event : lheFile.events ) + for( auto event : lheFile.getEvents() ) { auto currProc = std::make_shared( *event ); - if( procListComp( procsList, *currProc, pdgVec ) ){ continue; } + if( procListComp( procsList, *currProc, statVec ) ){ continue; } procsList.push_back( currProc ); } return procsList; } + std::vector> evProcessPull( lheNode& lheFile, std::vector statVec = { "-1", "1" } ) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setSameSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setStatSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + // ZW: fcn for keeping track of subprocess ordering // in LHE file - int procPos( const std::vector>& evtSet, lheProc& currProc, - const std::vector& pdgVec ) + size_t procPos( const std::vector>& evtSet, lheProc& currProc, + std::vector& statVec ) { - for( auto k = 0 ; k < evtSet.size() ; ++k ) + for( size_t k = 0 ; k < evtSet.size() ; ++k ) { - for( auto stat : pdgVec ) + for( auto stat : statVec ) { if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } } @@ -2146,48 +2947,210 @@ namespace REX return evtSet.size(); } + size_t evProcPos( const std::vector>& evtSet, event& currEv, + std::vector statVec = { "-1", "1" } ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + sortFcn sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + statSort sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + // ZW: fcn for extracting the subprocess ordering // of LHE file - std::vector>> procOrder( const lheNode& lheFile, const std::vector>& evtSet ) + std::vector>> procOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) { - const static std::vector pdgVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector>> eventBools( evtSet.size()); - std::vector> pracBools( evtSet.size(), std::vector ( lheFile.events.size() )); - for( auto boolSets : pracBools ){ - std::fill( boolSets.begin(), boolSets.end(), false ); + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector>> eventBools( evtSet.size(), std::make_shared> ( lheFile.getEvents().size() )); + //std::vector> pracBools( evtSet.size(), std::vector ( lheFile.getEvents().size() )); + for( auto boolSets : eventBools ){ + std::fill( boolSets->begin(), boolSets->end(), false ); } - for( auto k = 0 ; k < lheFile.events.size() ; ++k ) + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) { - auto currProc = lheProc(*lheFile.events[k]); - pracBools[ procPos(evtSet, currProc, pdgVec) ][ k ] = true; + auto currProc = lheProc(*lheFile.getEvents()[k]); + eventBools[ procPos(evtSet, currProc, statVec) ]->at( k ) = true; } - for( int k = 0 ; k < eventBools.size() ; ++k ) + //for( size_t k = 0 ; k < eventBools.size() ; ++k ) + //{ + // eventBools[k] = std::make_shared>( pracBools[k] ); + //} + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) { - eventBools[k] = std::make_shared>( pracBools[k] ); + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], statVec) ]->at( k ) = true; } - return eventBools; + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; } // ZW: fcn for reordering LHE file based on subprocess - std::shared_ptr>> eventReOrder( const lheNode& lheFile, std::vector relProc ) + std::shared_ptr>> eventReOrder( lheNode& lheFile, std::vector relProc ) { auto reOrdered = std::make_shared>>(); reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); - for( int k = 0 ; k < relProc.size() ; ++k ) + for( size_t k = 0 ; k < relProc.size() ; ++k ) { if(!relProc[k]){continue;} - reOrdered->push_back( lheFile.events[k] ); + reOrdered->push_back( lheFile.getEvents()[k] ); } return reOrdered; } // ZW: wrapper for eventReOrder - std::vector>>> lheReOrder( const lheNode& lheFile ) + std::vector>>> lheReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = processPull( lheFile, statVec ); + auto relProcs = procOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + sortFcn sorter, std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + statSort sorter, std::vector statVec = { "-1", "1" } ) { - auto procSets = processPull( lheFile ); - auto relProcs = procOrder( lheFile, procSets ); + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); std::vector>>> ordProcs(procSets.size()); - for( int k = 0 ; k < relProcs.size() ; ++k ) + for( size_t k = 0 ; k < relProcs.size() ; ++k ) { ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); } @@ -2202,10 +3165,11 @@ namespace REX std::vector aQEDs; std::vector aQCDs; std::vector nprts; + std::vector relNPrts; std::vector procIDs; evtInfo( const std::vector>& lheFile = {} ){ int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); procIDs.reserve(nEvt); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) { wgts.push_back(evt->getHead().getWeight()); @@ -2216,6 +3180,53 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } + evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc()[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } }; // ZW: transposed particle information struct @@ -2243,7 +3254,7 @@ namespace REX spins.push_back( prt->getSpin() ); statuses.push_back( prt->getStatus() ); pdgs.push_back( prt->getPDG() ); - for( int k = 0 ; k < 2 ; ++k ) + for( size_t k = 0 ; k < 2 ; ++k ) { moms.push_back( prt->getMom()[k] ); mothers.push_back( prt->getMothers()[k] ); @@ -2253,16 +3264,123 @@ namespace REX } } } - }; - - // ZW: transposed LHE file with a single process type - struct transMonoLHE { - public: - evtInfo evtsHead; - prtInfo evtsData; - transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder()[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + }; + + // ZW: transposed LHE file with a single process type + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + std::shared_ptr process; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ evtsHead = evtInfo(lheFile); evtsData = prtInfo(lheFile, nPrt); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; + } + transMonoLHE( const std::vector>& lheFile, const int nPrt, + statSort sorter, + std::vector statVec = { "-1", "1" } ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; } }; @@ -2271,16 +3389,82 @@ namespace REX public: std::string_view xmlFile; std::vector> subProcs; + std::vector> procSets; + std::vector>> relProcs; + transLHE(){ return; } transLHE( lheNode& lheFile ) { + procSets = evProcessPull( lheFile ); + relProcs = evProcOrder( lheFile, procSets ); xmlFile = lheFile.getFile(); - auto procsOrdered = lheReOrder( lheFile ); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs ); subProcs = std::vector>( procsOrdered.size() ); - for( int k = 0 ; k < procsOrdered.size() ; ++k ) + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) { subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); } } + transLHE( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = { "-1", "1" } ) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = { "-1", "1" } ) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE( lheNode& lheFile, const std::vector& statVec ) + { + procSets = evProcessPull( lheFile, statVec ); + relProcs = evProcOrder( lheFile, procSets, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); + } + } + template + std::shared_ptr> vectorFlat( std::vector>> vecVec ) + { + if( vecVec.size() == relProcs.size() ) continue; + else throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + for( size_t k = 0 ; k < vecVec.size() ; ++k){ + if( vecVec[k]->size() == relProcs[k]->size() ) continue; + else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); + } + auto flatVec = std::make_shared>(relProcs[0]->size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ){ + currInd = 0; + for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ + if( relProcs[k]->at(j) ){ + flatVec->at(currInd) = vecVec[k]->at(currInd); + ++currInd; + } + } + } + return flatVec; + } }; // ZW: vector transformation string_to_double @@ -2373,32 +3557,247 @@ namespace REX auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); std::vector>> &lheDs = *lheDos; int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.init->getHead()->ebmup[0], lheFile.init->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + //auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + const std::vector& statVec, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } if( boolVec[1] ){ - std::vector xsecVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xsecVec.push_back(line->xsecup); } lheDs[currInd] = vecStoD( xsecVec ); ++currInd; } if( boolVec[2] ){ - std::vector xerrVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xerrVec.push_back(line->xerrup); } lheDs[currInd] = vecStoD( xerrVec ); ++currInd; } if( boolVec[3] ){ - std::vector xmaxVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { xmaxVec.push_back(line->xmaxup); } lheDs[currInd] = vecStoD( xmaxVec ); ++currInd; } - for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) { if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } @@ -2432,20 +3831,20 @@ namespace REX auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); std::vector>> &lheDs = *lheIs; int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idbmup[0], lheFile.init->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfgup[0], lheFile.init->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->pdfsup[0], lheFile.init->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.init->getHead()->nprup } ); ++currInd; } + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } if( boolVec[5] ){ - std::vector lprVec( lheFile.init->getLines().size() ); - for( auto line : lheFile.init->getLines() ) + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) { lprVec.push_back(line->lprup); } lheDs[currInd] = vecStoI( lprVec ); ++currInd; } - for( int k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) { if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } @@ -2456,4 +3855,110 @@ namespace REX } return lheIs; } -} \ No newline at end of file + + std::shared_ptr>>> lheValInts( lheNode& lheFile, std::vector statVec, + lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + statSort sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } +} + +#endif \ No newline at end of file diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc new file mode 100644 index 0000000000..ee74f097f4 --- /dev/null +++ b/tools/REX/rwgt_driver.cc @@ -0,0 +1,115 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for C++ Standalone by +//%(info_lines)s +//========================================================================== +//========================================================================== +// Driver for reweighting events for processes +//%(multiprocess_lines)s +//-------------------------------------------------------------------------- + +#include "rwgt_instance.h" +#include +#include +//%(include_lines)s + +int usage( char* argv0, int ret = 1 ) +{ + std::cout << "Usage: " << argv0 + << " [--lhefile=\"/YOUR/PATH/HERE\"|-lhe=\"/YOUR/PATH/HERE\"] [--rwgtcard=/YOUR/PATH/HERE|-rwgt=\"/YOUR/PATH/HERE\"]\n" + << "[--output=/YOUR/PATH/HERE\"|-out=\"/YOUR/PATH/HERE\"]\n" << "[--param_card=/YOUR/PATH/HERE\"|-slha=\"/YOUR/PATH/HERE\"]\n"; + std::cout << "\n"; + std::cout << "The LHE file path should be with respect to the directory you are running\n"; + std::cout << "this program from, and similarly the rwgt_card should be as well.\n"; + return ret; +} + + +int main( int argc, char** argv ){ + std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; + + if (argc < 2){ + return usage( argv[0] ); + } + + // READ COMMAND LINE ARGUMENTS + for( int i = 1; i <= argc; i++ ) + { + auto currArg = std::string( argv[i] ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ + slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); + } + { + return usage( argv[0] ); + } + } + + if( lheFilePath.empty() || rwgtCardPath.empty() ){ + return usage( argv[0] ); + } + + std::string currPath = argv[0]; + + size_t slashPos = currPath.find_last_of( "/" ); + bool onWindows = false; + if( slashPos == std::string::npos ){ slashPos = currPath.find_last_of( "\\" ); onWindows = true; } + if( slashPos == std::string::npos ) + throw std::runtime_error( "Failed to determine current working directory -- need to know where program is run from to identify where to pull and push param_card.dat." ); + + if( slhaPath.empty() ){ + if( onWindows ){ + if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "..\\..\\Cards\\param_card.dat"; + } else{ + slhaPath = "\\Cards\\param_card.dat"; + } + } else { + if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ + slhaPath = "../../Cards/param_card.dat"; + } else { + slhaPath = "/Cards/param_card.dat"; + } + }} + + + // ZW : include rwgt_instances(s) +//%(rwgt_runners)s + +// std::vector runSet = {%(run_set)s}; + std::vector runSet; + REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + + REX::teaw::ampCall subProcSet; + + for( auto proc : runSet ){ + subProcSet.insert( REX::teaw::ampPair( proc.procEvent, proc.bridgeCall ) ); + } + + //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); + + //std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; + REX::teaw::rwgtRunner driver( fileCol, subProcSet ); + + + driver.runRwgt( outputPath ); + + return 0; + +} \ No newline at end of file diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h new file mode 100644 index 0000000000..374810a1aa --- /dev/null +++ b/tools/REX/rwgt_instance.h @@ -0,0 +1,69 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// Library including generic functions and classes for event reweighting. +// Process-specific rwgt_runner files are generated by mg5amc@nlo and use +// this library, while the rwgt_driver file is a wrapping program that +// calls the process-specific runners for given subprocesses. +//========================================================================== + +#ifndef _RWGT_INSTANCE_H_ +#define _RWGT_INSTANCE_H_ + +#include "teawREX.hpp" + +namespace rwgt{ + + //ZW: Function for calculating the number of remaining events in a warp + // in order to pad the input arrays to a multiple of the warp size + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ + return (nWarp - ( nEvt % nWarp )) % nWarp; + } + + //ZW: Function for padding the input arrays to a multiple of the warp size + template + std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ + auto nEvt = input->size(); + auto nWarpRemain = warpRemain( nEvt, nWarp ); + auto fauxNEvt = nEvt + nWarpRemain; + auto output = std::make_shared>( fauxNEvt ); + std::copy( input.begin(), input.end(), output->begin()); + return output; + } + + struct instance{ + std::vector> procEvent; + REX::event process; + REX::teaw::amplitude bridgeCall; + instance(){} + instance( std::vector>& event){ + this->procEvent = event; + this->process = REX::event( event ); + } + instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEvent = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void setProc( std::vector>& event ){ + this->procEvent = event; + this->process = REX::event( event ); + } + void setAmp( REX::teaw::amplitude& amp ){ + bridgeCall = amp; + } + std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ){ + return bridgeCall( momenta, alphaS ); + } + std::shared_ptr> ampEval( std::shared_ptr> momenta, + std::shared_ptr> alphaS ){ + return bridgeCall( *momenta, *alphaS ); + } + }; + +} + +#endif \ No newline at end of file diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc new file mode 100644 index 0000000000..8af00324a6 --- /dev/null +++ b/tools/REX/rwgt_runner.cc @@ -0,0 +1,134 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for the CUDACPP plugin by +%(info_lines)s +//========================================================================== +//========================================================================== +// A class for reweighting matrix elements for +%(process_lines)s +//-------------------------------------------------------------------------- + +#include "teawREX.hpp" +#include "rwgt_instance.h" +#include "fbridge.cc" + +// ZW: SET UP NAMESPACE +namespace %(process_namespace)s{ +//namespace dummy{ + + struct fbridgeRunner{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + CppObjectInFortran *fBridge; + const unsigned int chanId = 0; + const int nMom = 4; + int nWarpRemain; + int nEvt; + int fauxNEvt; + int nPar; + bool setup = false; + fbridgeRunner(){} + fbridgeRunner( REX::event& process ){ + nPar = process.getPrts().size(); + } + void runnerSetup( unsigned int& noEvts, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = noEvts; + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::vector& evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = evVec.size(); + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::shared_ptr> evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + runnerSetup( *evVec, warpSize ); + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + runnerSetup( alphaS ); + for( size_t k = 0 ; k < nWarpRemain ; ++k ){ + alphaS.push_back( 0. ); + for( size_t k = 0 ; k < nMom * nPar ; ++k ){ + momenta.push_back( 0. ); + } + } + auto evalScatAmps = std::make_shared>( fauxNEvt ); + fbridgecreate_( &fBridge, &fauxNEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + alphaS.resize( nEvt ); + momenta.resize( nEvt * nPar * nMom ); + evalScatAmps->resize( nEvt ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + return scatAmp( *momenta, *alphaS ); + } +#if defined MGONGPU_FPTYPE_FLOAT + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + auto nuMom = std::vector( nEvt ); + auto nuAlphaS = std::vector( nEvt ); + std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } +#endif + }; + + std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ + std::vector initPrts = %(init_prt_ids)s + std::vector finPrts = %(fin_prt_ids)s +// std::vector initPrts = {"-1"}; +// std::vector finPrts = {"1"}; + if( status == "-1" ){ + return REX::getRefOrder( initPrts, arguments ); + } + else if( status == "1" ){ + return REX::getRefOrder( finPrts, arguments ); + } + return REX::stoiSort( arguments ); + } + +// ZW: SET UP INPUT LHE BLOCK +// ZW: SET UP REX::event FROM LHE BLOCK +// auto procEvent = REX::event( procEvent ); + REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + std::vector> eventVec = {%(process_event)s}; + REX::event locEv = REX::event( eventVec ); + fbridgeRunner fBridge = fbridgeRunner( locEv ); + + REX::teaw::amplitude scatteringAmp = []( std::vector& momenta, std::vector& alphaS ){ + return fBridge.scatAmp( momenta, alphaS ); + }; + + REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + auto runner = rwgt::instance(eventVec, scatteringAmp); + //auto thisProc = runner.getProc( scatteringAmp ); + auto thisProc = runner.process.getProc( currProcSort ); +// ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE + +// ZW: SET UP EVALUATION OF MATRIX ELEMENTS FUNCTION + + +} \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 5c2eb2d3cd..e6b2c5f1e3 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -12,19 +12,33 @@ // IF YOU SEE THIS FILE, IT HAS BEEN SPREAD // FROM AN IMPROPER RELEASE. -// Copyright © 2023 CERN, CERN Author Zenny Wettersten. +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. +#ifndef _TEAWREX_HPP_ +#define _TEAWREX_HPP_ + #include #include #include #include #include #include +#include #include "REX.hpp" +#ifndef FORTRANFPTYPE +#define FORTRANFPTYPE double +#endif + namespace REX::teaw { + + using amplitude = std::function>(std::vector&, std::vector&)>; + using ampCall = std::map; + using ampPair = std::pair; + using vecMap = std::map>, REX::eventComp>; + template std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) { return evalFunc(momenta); } @@ -89,7 +103,7 @@ namespace REX::teaw { name = title; rwgtVals.resize( values.size() ); - for( int k = 0 ; k < values.size() ; ++k ) + for( size_t k = 0 ; k < values.size() ; ++k ) { rwgtVals[k] = rwgtVal( values[k] ); } @@ -147,7 +161,7 @@ namespace REX::teaw } } rwgtParams.reserve(blocks.size()); - for( int k = 0 ; k < blocks.size() ; ++k ) + for( size_t k = 0 ; k < blocks.size() ; ++k ) { rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); } @@ -165,7 +179,7 @@ namespace REX::teaw std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ auto slhaOrig = std::make_shared( paramOrig ); std::map blockIds; - for( int k = 0 ; k < slhaOrig->blocks.size() ; ++k ) + for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) { slhaOrig->blocks[k].parse( true ); auto nyama = std::pair( slhaOrig->blocks[k].name, k); blockIds.insert( nyama ); } @@ -204,7 +218,7 @@ namespace REX::teaw if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } nuLnch = srcCard.find( "launch", nuLnch + 6 ); } - for( int k = 0 ; k < lnchPos.size() - 1 ; ++k ) + for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) { auto strtLi = srcCard.find( "set", lnchPos[k] ); rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); @@ -289,16 +303,15 @@ namespace REX::teaw lheFile = lhe; lheFileSet = true; } - void setLhe( REX::lheNode lhe ){ + void setLhe( REX::lheNode& lhe ){ if( lheFileSet ){ return; } setLhe( std::make_shared( lhe ) ); lheFileSet = true; } void setLhe( std::string_view lhe_file ){ if( lheFileSet ){ return; } - size_t strt = 0; - size_t post = *REX::nodeEndFind( lhe_file, strt ); - lheFile = REX::lheParser( lhe_file, strt, post ); + //lheFile = REX::lheParser( lhe_file, strt, post ); + lheFile = std::make_shared( *lheFile ); lheFileSet = true; } std::shared_ptr getRwgt(){ return rwgtSets; } @@ -311,24 +324,33 @@ namespace REX::teaw setRwgt( rwgts ); } protected: - void setDoubles(){ + template + void setDoubles(Args&&... args){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; - auto vecOfVecs = REX::lheValDoubles( *lheFile, returnBools ); - if( vecOfVecs->size() != 3 ) - throw std::runtime_error( "LHE file appears to contain multiple types of processes. This has not yet been implemented." ); - wgts = vecOfVecs->at( 0 ); gS = vecOfVecs->at( 1 ); momenta = vecOfVecs->at( 2 ); + eventFile = REX::transLHE( *lheFile, args... ); + auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); + if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) + throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); + //wgts[0] = vecOfVecs->at( 0 ); gS[0] = vecOfVecs->at( 1 ); momenta[0] = vecOfVecs->at( 2 ); + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + wgts.push_back( vecOfVecs->at( 3*k ) ); + gS.push_back( vecOfVecs->at( 3*k + 1 ) ); + momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); + } } std::shared_ptr rwgtSets; std::shared_ptr slhaParameters; std::shared_ptr lheFile; - std::shared_ptr> wgts; - std::shared_ptr> gS; - std::shared_ptr> momenta; + std::vector>> wgts; + std::vector>> gS; + std::vector>> momenta; bool lheFileSet = false; bool slhaSet = false; bool rwgtSet = false; + REX::transLHE eventFile; }; struct rwgtFiles : rwgtCollection { @@ -341,20 +363,22 @@ namespace REX::teaw setSlhaPath( slha_card ); setLhePath( lhe_card ); } - void initCards(){ + template + void initCards(Args&&... args){ if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); pullRwgt(); pullSlha(); pullLhe(); setLhe( *lheCard ); setSlha( std::make_shared( *slhaCard ) ); setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); - setDoubles(); + setDoubles(args...); } - void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ){ + template + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ setLhePath( lhe_card ); setSlhaPath( slha_card ); setRwgtPath( reweight_card ); - initCards(); + initCards(args...); } protected: void pullRwgt(){ @@ -376,32 +400,61 @@ namespace REX::teaw struct rwgtRunner : rwgtFiles{ public: - void setMeEval( std::function>(std::vector&, std::vector&)> eval ){ meEval = eval; meInit = true; } + void setMeEval( amplitude eval ){ + meEval = eval; meInit = true; + ampCall nuEvals; + nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); + meEvals = nuEvals; + } + void setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } + void addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } rwgtRunner() : rwgtFiles(){ return; } rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } - rwgtRunner( rwgtFiles& rwgts, std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( rwgts ){ + rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ meEval = meCalc; meInit = true; } + rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ + meEvals = meCalcs; + meCompInit = true; + } rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - std::function>(std::vector&, std::vector&)> meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEval = meCalc; meInit = true; } + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + meEvals = meCalcs; + meCompInit = true; + } + bool oneME(){ return (meInit != meCompInit); } + bool singAmp(){ return (meInit && !meCompInit); } protected: bool meInit = false; + bool meCompInit = false; bool meSet = false; bool normWgtSet = false; - std::function>(std::vector&, std::vector&)> meEval; - std::shared_ptr> initMEs; - std::shared_ptr> meNormWgts; + amplitude meEval; + ampCall meEvals; + std::vector>> initMEs; + std::vector>> meNormWgts; + std::shared_ptr> normWgt; std::shared_ptr rwgtGroup; - void setMEs(){ - initCards(); - if( !meInit ) - throw std::runtime_error( "No function for evaluating scattering amplitudes has been provided." ); - auto ins = meEval( *momenta, *gS ); - initMEs = std::make_shared>( ins->begin(), ins->begin() + wgts->size() ); + template + void setMEs(Args&&... args){ + initCards(args...); + if( !oneME() ) + throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); + //ZW FIX THIS + initMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + auto ins = meEvals[eventFile.subProcs[k]]( *(momenta[k]), *(gS[k]) ); + initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); + } + //auto ins = meEval( *(momenta[0]), *(gS[0]) ); + //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; meSet = true; } bool setParamCard( std::shared_ptr slhaParams ){ @@ -413,14 +466,36 @@ namespace REX::teaw throw std::runtime_error( "Failed to overwrite parameter card." ); return true; } - void setNormWgts(){ - if( !meSet ){ setMEs(); } - if( initMEs->size() != wgts->size() ) - throw std::runtime_error( "Inconsistent number of events and event weights." ); - meNormWgts = std::make_shared>( wgts->size() ); - for( size_t k = 0; k < initMEs->size(); k++ ){ - meNormWgts->at( k ) = wgts->at( k ) / initMEs->at( k ); + void setNormWgtsSingleME(){ + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + meNormWgts = {std::make_shared>( wgts[0]->size() )}; + for( size_t k = 0; k < initMEs[0]->size(); k++ ){ + meNormWgts[0]->at( k ) = wgts[0]->at( k ) / initMEs[0]->at( k ); + } + normWgt = meNormWgts[0]; + } + void setNormWgtsMultiME(){ + meNormWgts = std::vector>>( initMEs.size() ); + for( auto k = 0 ; k < wgts.size() ; ++k ){ + meNormWgts[k] = std::make_shared>( wgts[k]->size() ); + for( auto i = 0 ; i < wgts[k]->size() ; ++i ){ + meNormWgts[k]->at( i ) = wgts[k]->at( i ) / initMEs[k]->at( i ); + } + } + normWgt = eventFile.vectorFlat( meNormWgts ); + } + template + void setNormWgts(Args&&... args){ + if( !oneME() ){ setMEs(args); } + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + for( auto k = 0; k < initMEs.size() ; ++k ){ + if( initMEs[k]->size() != wgts[k]->size() ) + throw std::runtime_error( "Inconsistent number of events and event weights." ); } + if( initMEs.size() == 1 ){ setNormWgtsSingleME(); } + else { setNormWgtsMultiME(); } normWgtSet = true; } bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ @@ -428,8 +503,21 @@ namespace REX::teaw throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) throw std::runtime_error( "Failed to rewrite parameter card." ); - auto newMEs = meEval( *momenta, *gS ); - auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); lheFile->addWgt( 0, nuWgt ); return true; @@ -439,8 +527,71 @@ namespace REX::teaw throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) throw std::runtime_error( "Failed to rewrite parameter card." ); - auto newMEs = meEval( *momenta, *gS ); - auto newWGTs = REX::vecElemMult( *newMEs, *meNormWgts ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + //auto newMEs = meEval( *momenta, *gS ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheFile->addWgt( 0, nuWgt ); + return true; + } + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + std::string& id, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); lheFile->addWgt( 0, nuWgt ); return true; @@ -456,9 +607,9 @@ namespace REX::teaw setMEs(); setNormWgts(); rwgtGroup = std::make_shared(); - auto currInd = lheFile->header->addWgtGroup( rwgtGroup ); + auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); auto paramSets = rwgtSets->writeCards( *slhaParameters ); - for( int k = 0 ; k < paramSets.size(); k++ ){ + for( size_t k = 0 ; k < paramSets.size(); k++ ){ singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); std::cout << "."; } @@ -467,4 +618,6 @@ namespace REX::teaw std::cout << "\nReweighting done.\n"; } }; -} \ No newline at end of file +} + +#endif \ No newline at end of file From d5933a89aa665a6eec101d05b9e25200db3281ea Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 11:02:16 +0100 Subject: [PATCH 11/76] small fixes to rwgt code --- tools/REX/rwgt_driver.cc | 6 +++--- tools/REX/rwgt_runner.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index ee74f097f4..57838a2dd0 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -5,11 +5,11 @@ //========================================================================== //========================================================================== // This file has been automatically generated for C++ Standalone by -//%(info_lines)s +%(info_lines)s //========================================================================== //========================================================================== // Driver for reweighting events for processes -//%(multiprocess_lines)s +%(multiprocess_lines)s //-------------------------------------------------------------------------- #include "rwgt_instance.h" @@ -91,7 +91,7 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) //%(rwgt_runners)s -// std::vector runSet = {%(run_set)s}; + std::vector runSet = {%(run_set)s}; std::vector runSet; REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); fileCol.initCards(); diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 8af00324a6..a770bf69aa 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -14,10 +14,10 @@ #include "teawREX.hpp" #include "rwgt_instance.h" -#include "fbridge.cc" // ZW: SET UP NAMESPACE namespace %(process_namespace)s{ +#include "fbridge.cc" //namespace dummy{ struct fbridgeRunner{ From b5827c235100dc4e01476293c012323f2005a56a Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 10:57:28 +0100 Subject: [PATCH 12/76] changed submodule to my fork --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1f00c67701..997b366b8f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "MG5aMC/mg5amcnlo"] path = MG5aMC/mg5amcnlo - url = https://github.com/mg5amcnlo/mg5amcnlo - branch = gpucpp + url = https://github.com/zeniheisser/mg5amcnlo + branch = rexCPP From 5433aacf5a96316cb2bb16de479a64b61fa7c17e Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 4 Mar 2024 12:38:38 +0100 Subject: [PATCH 13/76] fixes to get rwgt exporter working --- .../PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 1 + .../madgraph/iolibs/template_files/REX | 1 + .../iolibs/template_files/gpu/cudacpp_rex.mk | 1040 +++++++++++++++++ .../CUDACPP_SA_OUTPUT/model_handling.py | 85 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 127 ++ tools/REX/rwgt_driver.inc | 1 + tools/REX/rwgt_runner.inc | 1 + 7 files changed, 1253 insertions(+), 3 deletions(-) create mode 120000 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk create mode 120000 tools/REX/rwgt_driver.inc create mode 120000 tools/REX/rwgt_runner.inc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index d0138a95ac..9014cdebce 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -35,6 +35,7 @@ new_output = { 'madevent_simd' : output.SIMD_ProcessExporter, 'madevent_gpu' : output.GPU_ProcessExporter, 'standalone_cudacpp' : output.PLUGIN_ProcessExporter, + 'standalone_rwgtcpp' : output.RWGT_ProcessExporter, # the following one are used for the second exporter class # (not really needed so far but interesting if need # specialization in the futur) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX new file mode 120000 index 0000000000..1a916a1ca1 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/REX @@ -0,0 +1 @@ +../../../../../../../../tools/REX/ \ No newline at end of file diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk new file mode 100644 index 0000000000..efe82df88d --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk @@ -0,0 +1,1040 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../../Source/make_opts),) +include ../../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) +override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) +override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else +override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) +TESTDIR = +else ifneq ($(LOCALGTEST),) +TESTDIR=$(TESTDIRLOCAL) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) +TESTDIR = $(TESTDIRCOMMON) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else +TESTDIR = +endif +ifneq ($(GTEST_ROOT),) +GTESTLIBDIR = $(GTEST_ROOT)/lib64/ +GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a +GTESTINC = -I$(GTEST_ROOT)/include +else +GTESTLIBDIR = +GTESTLIBS = +GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. +# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. +# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. +# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) +# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below +ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") + override CUDA_HOME=disabled +endif + +# If CUDA_HOME is not set, try to set it from the path to nvcc +ifndef CUDA_HOME + CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") +endif + +# If HIP_HOME is not set, try to set it from the path to hipcc +ifndef HIP_HOME + HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + $(warning HIP_HOME was not set: using "$(HIP_HOME)") +endif + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), +# builds are performed for HIP only if CUDA is not found in the path. +# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. +# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +#--- Option 1: CUDA exists -> use CUDA + +# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists +ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) + + GPUCC = $(CUDA_HOME)/bin/nvcc + USE_NVTX ?=-DUSE_NVTX + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # Embed device code for 70, and PTX for 70+. + # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + CUINC = -I$(CUDA_HOME)/include/ + CUOPTFLAGS = -lineinfo + ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + CUBUILDRULEFLAGS = -Xcompiler -fPIC -c + CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu + CUDATESTFLAGS = -lcuda + + # Set the host C++ compiler for GPUCC via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifneq ($(origin REQUIRE_CUDA),undefined) + + # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) + $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) + +#--- Option 2: CUDA does not exist, HIP exists -> use HIP + +# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists +else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) + + GPUCC = $(HIP_HOME)/bin/hipcc + #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? + HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a + HIPINC = -I$(HIP_HOME)/include/ + # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP + # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) + # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) + GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + GPUFLAGS += -std=c++17 + ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) + CUBUILDRULEFLAGS = -fPIC -c + CCBUILDRULEFLAGS = -fPIC -c -x hip + +else ifneq ($(origin REQUIRE_HIP),undefined) + + # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) + $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) + +#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP + +else + + # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ + $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) + $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) + override GPUCC= + override USE_NVTX= + override CUINC= + override HIPINC= + +endif + +# Export GPUCC (so that it can also be used in cudacpp_src.mk?) +export GPUCC +export GPUFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= -Xcompiler -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) +override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +# Set the default AVX (vectorization) choice +ifeq ($(AVX),) + ifeq ($(UNAME_P),ppc64le) + ###override AVX = none + override AVX = sse4 + else ifeq ($(UNAME_P),arm) + ###override AVX = none + override AVX = sse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override AVX = none + $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override AVX = 512y + ###$(info Using AVX='$(AVX)' as no user input exists) + else + override AVX = avx2 + ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + $(warning Using AVX='$(AVX)' because host does not support avx512vl) + else + $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) + endif + endif +else + ###$(info Using AVX='$(AVX)' according to user input) +endif + +# Set the default FPTYPE (floating point type) choice +ifeq ($(FPTYPE),) + override FPTYPE = d +endif + +# Set the default HELINL (inline helicities?) choice +ifeq ($(HELINL),) + override HELINL = 0 +endif + +# Set the default HRDCOD (hardcode cIPD physics parameters?) choice +ifeq ($(HRDCOD),) + override HRDCOD = 0 +endif + +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +export AVX +export FPTYPE +export HELINL +export HRDCOD +export OMPFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASHIPRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),none) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASHIPRAND=$(HASHIPRAND)) + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIR = ../../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) +else + override BUILDDIR = . + override LIBDIR = ../../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override CULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override CULIBFLAGSRPATH2 = +else + # RPATH to cuda/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking cuda/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + +cxx_main=$(BUILDDIR)/check.exe +fcxx_main=$(BUILDDIR)/fcheck.exe + +ifneq ($(GPUCC),) +cu_main=$(BUILDDIR)/gcheck.exe +fcu_main=$(BUILDDIR)/fgcheck.exe +else +cu_main= +fcu_main= +endif + +testmain=$(BUILDDIR)/runTest.exe + +ifneq ($(GTESTLIBS),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(testmain) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUOPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Generic target and build rules: objects from CUDA or HIP compilation +# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) +ifneq ($(GPUCC),) +$(BUILDDIR)/%%.o : %%.cu *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ + +$(BUILDDIR)/%%_cu.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ +endif + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUINC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ + +# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +ifeq ($(findstring nvcc,$(GPUCC)),nvcc) + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math +else + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math +endif +endif + +# Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) +$(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) + +# Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) +endif +ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o + +ifneq ($(GPUCC),) +MG5AMC_CULIB = mg5amc_$(processid_short)_cuda +cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o +cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o +endif + +# Target (and build rules): C++ and CUDA shared libraries +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_objects_lib) + $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# Bypass std::filesystem completely to ease portability on LUMI #803 +#ifneq ($(findstring hipcc,$(GPUCC)),) +# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +#else +# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +#endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +# Target (and build rules): C++ and CUDA standalone executables +$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(cu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(cu_main): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o + $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +$(BUILDDIR)/%%.o : %%.f *.inc + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc + +ifeq ($(UNAME_S),Darwin) +$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +else + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +endif + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +endif +ifeq ($(UNAME_S),Darwin) +$(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) +endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +$(BUILDDIR)/testxxx.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions + +ifneq ($(GPUCC),) +$(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt +$(testmain): $(BUILDDIR)/testxxx_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions +endif + +$(BUILDDIR)/testmisc.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/testmisc.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests + +ifneq ($(GPUCC),) +$(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/testmisc_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests +endif + +$(BUILDDIR)/runTest.o: $(GTESTLIBS) +$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) +$(testmain): $(BUILDDIR)/runTest.o +$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o + +ifneq ($(GPUCC),) +$(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(testmain): $(BUILDDIR)/runTest_cu.o +$(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o +endif + +$(testmain): $(GTESTLIBS) +$(testmain): INCFLAGS += $(GTESTINC) +$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main + +ifneq ($(OMPFLAGS),) +ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +else +$(testmain): LIBFLAGS += -lgomp +endif +endif + +# Bypass std::filesystem completely to ease portability on LUMI #803 +#ifneq ($(findstring hipcc,$(GPUCC)),) +#$(testmain): LIBFLAGS += -lstdc++fs +#endif + +ifeq ($(GPUCC),) # link only runTest.o +$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +else # link both runTest.o and runTest_cu.o +$(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) +endif +endif + +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + +# Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +$(GTESTLIBS): +ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +else + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) +# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds +# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) +avxnone: + @echo + $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) + +avxsse4: + @echo + $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) + +avxavx2: + @echo + $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) + +avx512y: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) + +avx512z: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else ifeq ($(UNAME_P),arm) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z +avxall: avxnone avxsse4 avxavx2 avx512y avx512z +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so +endif + $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: check (run the C++ test executable) +# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] +ifneq ($(GPUCC),) +check: runTest cmpFcheck cmpFGcheck +else +check: runTest cmpFcheck +endif + +# Target: runTest (run the C++ test executable runTest.exe) +runTest: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/runTest.exe + +# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 + +# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) +runGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 + +# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 + +# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) +runFGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 + +# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) +cmpFGcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 4d917f5637..3811013e24 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1168,9 +1168,14 @@ def get_process_class_definitions(self, write=True): replace_dict['nincoming'] = nincoming replace_dict['noutcoming'] = nexternal - nincoming replace_dict['nbhel'] = self.matrix_elements[0].get_helicity_combinations() # number of helicity combinations - file = self.read_template_file(self.process_class_template) % replace_dict # HACK! ignore write=False case - file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) - return file +# file = self.read_template_file(self.process_class_template) % replace_dict # HACK! ignore write=False case +# file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) + if write: + file = self.read_template_file(self.process_class_template) % replace_dict + file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) + return file + else: + return replace_dict # AV - replace export_cpp.OneProcessExporterGPU method (fix CPPProcess.cc) def get_process_function_definitions(self, write=True): @@ -2097,3 +2102,77 @@ def generate_helas_call(self, argument): self.add_amplitude(argument.get_call_key(), call_function) #------------------------------------------------------------------------------------ +class PLUGIN_OneProcessExporterRwgt(PLUGIN_OneProcessExporter): + """A custom OneProcessExporter for the REX reweighting""" + + rwgt_template = 'gpu/rwgt_runner.inc' + + # ZW - rwgt functions + def get_rwgt_legs(self, process): + """Return string with particle ids and status in the REX std::pair format""" + return ",".join(["{%i,%i}" % (leg.get('state'), leg.get('id')) \ + for leg in process.get('legs')]).replace('0', '-1') + + def get_init_prts_vec(self, process): + """Return string with initial state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 0]) + return "{" + prts + "}" + + def get_fin_prts_vec(self, process): + """Return string with final state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 1]) + return "{" + prts + "}" + + def get_rwgt_procMap(self, process): + """Return string with particle states and order in the REX procMap format""" + currState = False + retString = "thisProc{{\"-1\",{" + for leg in process.get('legs'): + if currState == leg.get('state'): + retString += "\"%i\"," % leg.get('id') + else: + currState = leg.get('state') + retString += "}},{\"1\",{\"%i,\"" % leg.get('id') + retString = retString[:-1] + "}}}" + return retString + + def get_proc_dir(self): + """Return process directory name for the current process""" + return "P%d_%s" % (self.process_number, self.process_name) + + def get_rwgt_runner(self): + """Return string to initialise the rwgtRunners in teawREX""" + return "%s::runner" % (self.get_proc_dir()) + + def get_rwgt_includes(self): + """Return string with the include directives for the REX reweighting""" + return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) + + def edit_rwgt_runner(self): + """Create the rwgt_runner.cc file for the REX reweighting""" + ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') + # Create the rwgt_runner.cc file +# replace_dict = {} + replace_dict = super().get_process_class_definitions(write=False) + rwgt_runner = self.get_proc_dir() + self.rwgt_template + replace_dict['process_namespace'] = self.get_proc_dir() + replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['init_prt_ids'] = self.get_init_prts_vec(self.matrix_elements[0].get('processes')[0]) + replace_dict['fin_prt_ids'] = self.get_fin_prts_vec(self.matrix_elements[0].get('processes')[0]) + replace_dict['process_event'] = self.get_rwgt_legs(self.matrix_elements[0].get('processes')[0]) + template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() + ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') + ff.write(template % replace_dict) + ff.close() + + # ZW - override the PLUGIN method to generate the rwgt_runner.cc file as well + # note: also generating standard check_sa.cc and gcheck_sa.cu files, which + # are not used in the REX reweighting + def generate_process_files(self): + """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" + misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') + super().generate_process_files() + misc.sprint('Generating rwgt_runner file') + self.edit_rwgt_runner() + misc.sprint('Finished generating rwgt files') + diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index b1a9b9aff8..f29c5aeaf4 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -339,3 +339,130 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): return out #------------------------------------------------------------------------------------ + +class RWGT_ProcessExporter(PLUGIN_ProcessExporter): + + oneprocessclass = model_handling.PLUGIN_OneProcessExporterRwgt + + rwgt_names = [] + proc_lines = [] + + + s = PLUGINDIR + '/madgraph/iolibs/template_files/' + from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', + s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], + 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], + 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', + s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', + s+'CMake/src/CMakeLists.txt' ], + 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', + s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', + s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', + s+'gpu/MemoryAccessMatrixElements.h', s+'gpu/MemoryAccessMomenta.h', + s+'gpu/MemoryAccessRandomNumbers.h', s+'gpu/MemoryAccessWeights.h', + s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h', + s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h', + s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h', + s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h', + s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', + s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', + s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', + s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', + s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', + s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', + s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', + s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', + s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', + s+'gpu/perf.py', s+'gpu/profile.sh', + s+'CMake/SubProcesses/CMakeLists.txt'], + 'test': [s+'gpu/cudacpp_test.mk']} + + from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') + from_template['SubProcesses'].append(s+'REX/REX.hpp') + from_template['SubProcesses'].append(s+'REX/teawREX.hpp') + + to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', + 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', + 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', + 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', + 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', + 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', + 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', + 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', + 'EventStatistics.h', 'CommonRandomNumbers.h', + 'CrossSectionKernels.cc', 'CrossSectionKernels.h', + 'MatrixElementKernels.cc', 'MatrixElementKernels.h', + 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', + 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', + 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', + 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', + 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', + 'MadgraphTest.h', 'runTest.cc', + 'testmisc.cc', 'testxxx_cc_ref.txt', + 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 + 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 + 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 + 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 + 'perf.py', 'profile.sh'] + + to_link_in_P.append('rwgt_instance.h') + to_link_in_P.append('REX.hpp') + to_link_in_P.append('teawREX.hpp') + + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex.mk') + + # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): + # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') + # misc.sprint(' type(subproc_group)=%s'%type(subproc_group)) # e.g. madgraph.core.helas_objects.HelasMatrixElement + # misc.sprint(' type(fortran_model)=%s'%type(fortran_model)) # e.g. madgraph.iolibs.helas_call_writers.GPUFOHelasCallWriter + # misc.sprint(' type(me)=%s me=%s'%(type(me) if me is not None else None, me)) # e.g. int + # return super().generate_subprocess_directory(subproc_group, fortran_model, me) + + def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, + proc_number=None): + """Generate the Pxxxxx directory for a subprocess in C++ standalone, + including the necessary .h and .cc files""" + + + process_exporter_cpp = self.oneprocessclass(matrix_element,cpp_helas_call_writer) + + self.rwgt_names.append("P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + + process_lines = "\n".join([process_exporter_cpp.get_process_info_lines(me) for me in \ + process_exporter_cpp.matrix_elements]) + self.proc_lines.append(process_lines) + + # Create the directory PN_xx_xxxxx in the specified path + dirpath = pjoin(self.dir_path, 'SubProcesses', "P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + try: + os.mkdir(dirpath) + except os.error as error: + logger.warning(error.strerror + " " + dirpath) + + with misc.chdir(dirpath): + logger.info('Creating files in directory %s' % dirpath) + process_exporter_cpp.path = dirpath + # Create the process .h and .cc files + process_exporter_cpp.generate_process_files() + for file in self.to_link_in_P: + files.ln('../%s' % file) + return + + def export_driver(self): + replace_dict = {} + replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) + replace_dict['include_lines'] = '' + replace_dict['run_set'] = '' + for name in self.rwgt_names: + replace_dict['include_lines'] += '#include "%s/rwgt_runner.cc"\n' % name + replace_dict['run_set'] += '%s::runner,' % name + replace_dict['run_set'] = replace_dict['run_set'][:-1] + template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) + template = open(pjoin(template_path,'REX', 'rwgt_driver.inc'),'r').read() + ff = open(pjoin(self.dir_path, 'SubProcesses', 'rwgt_driver.cc'),'w') + ff.write(template % replace_dict) + ff.close() + diff --git a/tools/REX/rwgt_driver.inc b/tools/REX/rwgt_driver.inc new file mode 120000 index 0000000000..77a39010fd --- /dev/null +++ b/tools/REX/rwgt_driver.inc @@ -0,0 +1 @@ +rwgt_driver.cc \ No newline at end of file diff --git a/tools/REX/rwgt_runner.inc b/tools/REX/rwgt_runner.inc new file mode 120000 index 0000000000..ff1267c3dc --- /dev/null +++ b/tools/REX/rwgt_runner.inc @@ -0,0 +1 @@ +rwgt_runner.cc \ No newline at end of file From e5a95d3fe576382c63adfd21e7342702b56bd6cc Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 6 Mar 2024 11:15:53 +0100 Subject: [PATCH 14/76] small modifications and added files, checking fbridge which currently does ntot compile(?) --- .gitmodules | 2 +- MG5aMC/mg5amcnlo | 2 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 2 + tools/REX/10simevs.lhe | 550 +++++ tools/REX/2diffevs.lhe | 400 ++++ tools/REX/2simevs.lhe | 407 ++++ tools/REX/REX.hpp | 17 +- tools/REX/rwgt_driver.cc | 3 +- tools/REX/rwgt_runner.cc | 4 +- tools/REX/teawREX.hpp | 4 +- tools/REX/tester.cpp | 62 + tools/REX/unweighted_events.lhe | 1870 +++++++++++++++++ 12 files changed, 3306 insertions(+), 17 deletions(-) create mode 100644 tools/REX/10simevs.lhe create mode 100644 tools/REX/2diffevs.lhe create mode 100644 tools/REX/2simevs.lhe create mode 100644 tools/REX/tester.cpp create mode 100644 tools/REX/unweighted_events.lhe diff --git a/.gitmodules b/.gitmodules index 997b366b8f..6fbb5110b6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "MG5aMC/mg5amcnlo"] path = MG5aMC/mg5amcnlo - url = https://github.com/zeniheisser/mg5amcnlo + url = https://github.com/zeniheisser/mg5amcnlo/ branch = rexCPP diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index f9f9579188..89822fa1a9 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit f9f95791884846ce82b5bf7997726222d8ffbe5e +Subproject commit 89822fa1a9ad49854794816a809ff828866773c3 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index f29c5aeaf4..2d92b35dd0 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -451,6 +451,8 @@ def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, return def export_driver(self): + misc.sprint("In export_driver") + misc.sprint("Current working directory is: %s" % self.dir_path) replace_dict = {} replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) diff --git a/tools/REX/10simevs.lhe b/tools/REX/10simevs.lhe new file mode 100644 index 0000000000..23432065d5 --- /dev/null +++ b/tools/REX/10simevs.lhe @@ -0,0 +1,550 @@ + +
+ + +3.5.2 + + + t t~ > w+ b w- b~ +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +g g > t t~ > w+ b w- b~ #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### +###################################################################### +## ## +## Width set on Auto will be computed following the information ## +## present in the decay.py files of the model. ## +## See arXiv:1402.1178 for more details. ## +## ## +###################################################################### + +################################### +## INFORMATION FOR MASS +################################### +Block mass + 5 4.700000e+00 # MB + 6 1.730000e+02 # MT + 15 1.777000e+00 # MTA + 23 9.118800e+01 # MZ + 25 1.250000e+02 # MH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) + +################################### +## INFORMATION FOR SMINPUTS +################################### +Block sminputs + 1 1.325070e+02 # aEWM1 + 2 1.166390e-05 # Gf + 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) + +################################### +## INFORMATION FOR YUKAWA +################################### +Block yukawa + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau + +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # WT +DECAY 23 2.441404e+00 # WZ +DECAY 24 2.047600e+00 # WW +DECAY 25 6.382339e-03 # WH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 10 +# Integrated weight (pb) : 439.19338 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +4.391934e+02 3.661122e+00 4.391934e+02 1 +please cite 1405.0301 + + + 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.21669541E+03 +0 + 1 21 0.24236690E-01 0.21840939E+03 + 1 21 0.65523357E-01 0.21840939E+03 + 0.33953413E+04 + + + + 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.31576070E+03 +0 + 1 21 0.81905139E-01 0.31588770E+03 + 1 21 0.32388313E-01 0.31588770E+03 + 0.11189986E+04 + + + + 7 1 +4.3919338e+02 2.51159400e+02 7.54677100e-03 1.11876800e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +8.2851295259e+01 8.2851295259e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.1984564692e+03 2.1984564692e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 502 -7.7080486205e+01 -1.0172807019e+02 -7.8449770191e+01 2.2872812745e+02 1.7283572612e+02 0.0000e+00 0.0000e+00 + 24 1 1 2 0 0 +5.9800337718e+00 +7.0764350150e+01 -4.1759378302e+02 4.3115558402e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + 5 1 1 2 501 0 +7.1100452433e+01 +3.0963720041e+01 -1.6195616207e+03 1.6214240530e+03 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 -4.7255780753e+01 +1.4691445127e+01 -3.8855826763e+01 1.0210748553e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 502 -2.9824705452e+01 -1.1641951532e+02 -3.9593943428e+01 1.2662064191e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.24154488E+03 +0 + 1 21 0.12746352E-01 0.25115937E+03 + 1 21 0.33822410E+00 0.25115937E+03 + 0.10034989E+03 + + + + 8 1 +4.3919338e+02 1.78714900e+02 7.54677100e-03 1.16958300e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +4.3091677531e+02 4.3091677531e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.0391889102e+02 1.0391889102e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 -3.6067364068e+01 -1.4649928734e+01 +1.7471337281e+01 1.7742037393e+02 1.7221285172e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 +3.6067364068e+01 +1.4649928734e+01 +3.0952654701e+02 3.5741529240e+02 1.7442342955e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +2.2734065795e+01 +3.3518972368e+01 +6.3307765789e+01 1.1007019769e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + 5 1 4 4 501 0 +1.3333298273e+01 -1.8869043633e+01 +2.4621878122e+02 2.4734509471e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +3.4800320870e+01 +1.3510658921e+01 +4.0711502437e+01 9.7561481242e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -7.0867684937e+01 -2.8160587655e+01 -2.3240165156e+01 7.9858892692e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.17817321E+03 +0 + 1 21 0.66294888E-01 0.17871488E+03 + 1 21 0.15987522E-01 0.17871488E+03 + 0.73335693E+04 + + + + 8 1 +4.3919338e+02 2.04872300e+02 7.54677100e-03 1.14579500e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7895944857e+02 3.7895944857e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.5403273068e+02 4.5403273068e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 6 2 1 2 501 0 -9.4158773160e+01 +4.2436191949e+01 -3.9984076151e+02 4.4927194413e+02 1.7693484753e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 +9.4158773160e+01 -4.2436191949e+01 +3.2476747940e+02 3.8372023512e+02 1.7635361696e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 -1.0155138529e+02 +3.1755977097e+01 -4.1062842852e+02 4.3174535111e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 3 3 501 0 +7.3926121347e+00 +1.0680214852e+01 +1.0787667014e+01 1.7526593025e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 +2.8155867650e+01 -1.5152453482e+01 +2.9281756166e+02 3.0533864823e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + -5 1 4 4 0 502 +6.6002905510e+01 -2.7283738467e+01 +3.1949917737e+01 7.8381586893e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.20474671E+03 +0 + 1 21 0.58301453E-01 0.20487227E+03 + 1 21 0.69851190E-01 0.20487227E+03 + 0.50816560E+03 + + + + 8 1 +4.3919338e+02 2.01015900e+02 7.54677100e-03 1.15150100e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.0319621128e+02 6.0319621128e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -9.8768974383e+01 9.8768974383e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +1.4738675699e+01 +7.6352852286e+01 +4.6116481070e+01 2.0623803560e+02 1.8536561409e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -1.4738675699e+01 -7.6352852286e+01 +4.5831075582e+02 4.9572715006e+02 1.7219080054e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -4.3843112218e+00 -4.1059533654e+00 +3.8045368848e+02 3.8890655185e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -1.0354364477e+01 -7.2246898921e+01 +7.7857067340e+01 1.0682059821e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +2.7266458026e+01 +1.2364655742e+02 +4.6653600940e+01 1.5708502580e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 + -5 1 3 3 0 503 -1.2527782328e+01 -4.7293705129e+01 -5.3711986978e-01 4.9153009803e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.19792528E+03 +0 + 1 21 0.92799415E-01 0.20101591E+03 + 1 21 0.15195227E-01 0.20101591E+03 + 0.37319721E+04 + + + + 8 1 +4.3919338e+02 1.74602100e+02 7.54677100e-03 1.17351100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +9.1248808608e+02 9.1248808608e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -3.9355927787e+01 3.9355927787e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 6 2 1 2 501 0 -2.5269092142e+00 -7.0232873774e+00 +2.4558729161e+02 3.0046206483e+02 1.7294109624e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 +2.5269092142e+00 +7.0232873774e+00 +6.2754486669e+02 6.5138194904e+02 1.7444246039e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +6.0339304741e+01 -1.2037996311e+01 +1.0278716341e+02 1.4428500258e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 3 3 501 0 -6.2866213955e+01 +5.0147089339e+00 +1.4280012819e+02 1.5617706226e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.3438702161e+01 -4.5120986152e+01 +2.2033674694e+02 2.4000161283e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 4 4 0 502 +2.5965611375e+01 +5.2144273529e+01 +4.0720811975e+02 4.1138033621e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.17422586E+03 +0 + 1 21 0.14038277E+00 0.17460207E+03 + 1 21 0.60547588E-02 0.17460207E+03 + 0.69618335E+04 + + + + 8 1 +4.3919338e+02 1.88007500e+02 7.54677100e-03 1.16157200e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.3582462508e+02 6.3582462508e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.6129828482e+02 1.6129828482e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +4.8116344176e+01 -2.9059974330e+01 -8.6672370246e+01 2.0702396972e+02 1.7940780248e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -4.8116344176e+01 +2.9059974330e+01 +5.6119871050e+02 5.9009894018e+02 1.7353127052e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -1.6998242631e+01 +4.7411725194e+01 +1.5136221811e+02 1.7864643563e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -3.1118101545e+01 -1.8351750864e+01 +4.0983649239e+02 4.1145250455e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +3.9713907948e+01 -3.9675651300e+00 +2.4165830325e+01 9.2973863893e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 3 3 0 503 +8.4024362278e+00 -2.5092409200e+01 -1.1083820057e+02 1.1405010583e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.18659178E+03 +0 + 1 21 0.97819172E-01 0.18800751E+03 + 1 21 0.24815121E-01 0.18800751E+03 + 0.13327893E+04 + + + + 8 1 +4.3919338e+02 2.47273800e+02 7.54677100e-03 1.11506100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +4.7922758970e+02 4.7922758970e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.2754705753e+02 1.2754705753e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.6471793564e+02 +6.3009040540e+01 +1.7155189565e+02 3.0095574765e+02 1.7332677701e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.6471793564e+02 -6.3009040540e+01 +1.8012863652e+02 3.0581889959e+02 1.7313791075e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.6303415524e+01 +9.2300656218e+01 +8.8725358462e+01 1.6133471705e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.0841452012e+02 -2.9291615679e+01 +8.2826537186e+01 1.3962103059e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -1.6187054837e+02 -9.3582557312e+01 +1.1528314764e+02 2.3391705698e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 4 4 0 502 -2.8473872739e+00 +3.0573516772e+01 +6.4845488879e+01 7.1901842605e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.24724066E+03 +0 + 1 21 0.73727321E-01 0.24727375E+03 + 1 21 0.19622624E-01 0.24727375E+03 + 0.38478950E+04 + + + + 8 1 +4.3919338e+02 1.82375300e+02 7.54677100e-03 1.16561400e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +3.2305192784e+02 3.2305192784e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.3439332851e+02 1.3439332851e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -6 2 1 2 0 503 +4.6712402287e+01 -3.3471733509e+01 -1.6591366530e+01 1.8312847738e+02 1.7308483560e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -4.6712402287e+01 +3.3471733509e+01 +2.0524996585e+02 2.7431677897e+02 1.7268393460e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 -5.0429547514e+01 +6.7234938560e+01 +2.0963658148e+02 2.3974650878e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 +3.7171452269e+00 -3.3763205051e+01 -4.3866156262e+00 3.4570270185e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +2.0594294555e+01 +4.7013575059e+01 +5.4619595756e+00 9.5558621614e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 +2.6118107732e+01 -8.0485308568e+01 -2.2053326106e+01 8.7569855767e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.18228016E+03 +0 + 1 21 0.49700296E-01 0.18237534E+03 + 1 21 0.20675897E-01 0.18237534E+03 + 0.84041211E+04 + + +
diff --git a/tools/REX/2diffevs.lhe b/tools/REX/2diffevs.lhe new file mode 100644 index 0000000000..634129df21 --- /dev/null +++ b/tools/REX/2diffevs.lhe @@ -0,0 +1,400 @@ + +
+ + +3.5.2 + + + 3j +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +p p > 3j #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### +###################################################################### +################################### +## INFORMATION FOR MASS +################################### +BLOCK MASS # + 5 4.700000e+00 # mb + 6 1.730000e+02 # mt + 15 1.777000e+00 # mta + 23 9.118800e+01 # mz + 25 1.250000e+02 # mh + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) +################################### +## INFORMATION FOR SMINPUTS +################################### +BLOCK SMINPUTS # + 1 1.325070e+02 # aewm1 + 2 1.166390e-05 # gf + 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) +################################### +## INFORMATION FOR YUKAWA +################################### +BLOCK YUKAWA # + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # wt +DECAY 23 2.441404e+00 # wz +DECAY 24 2.047600e+00 # ww +DECAY 25 6.382339e-03 # wh +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 100 +# Integrated weight (pb) : 66372287.22200001 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +6.637229e+07 1.268397e+06 6.637229e+07 1 +please cite 1405.0301 + + + 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32555892E+02 +0 + 1 21 0.43037713E-01 0.32555892E+02 + 1 21 0.77100414E-02 0.32555892E+02 + 0.65037882E+05 + + + + 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30590836E+02 +0 + 1 21 0.97313711E-01 0.30590836E+02 + 1 2 0.70353702E-01 0.30590836E+02 + 0.91658669E+02 + + +
diff --git a/tools/REX/2simevs.lhe b/tools/REX/2simevs.lhe new file mode 100644 index 0000000000..6fb52dd403 --- /dev/null +++ b/tools/REX/2simevs.lhe @@ -0,0 +1,407 @@ + +
+ + +3.5.2 + + + t t~ > w+ b w- b~ +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +g g > t t~ > w+ b w- b~ #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### +###################################################################### +## ## +## Width set on Auto will be computed following the information ## +## present in the decay.py files of the model. ## +## See arXiv:1402.1178 for more details. ## +## ## +###################################################################### + +################################### +## INFORMATION FOR MASS +################################### +Block mass + 5 4.700000e+00 # MB + 6 1.730000e+02 # MT + 15 1.777000e+00 # MTA + 23 9.118800e+01 # MZ + 25 1.250000e+02 # MH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) + +################################### +## INFORMATION FOR SMINPUTS +################################### +Block sminputs + 1 1.325070e+02 # aEWM1 + 2 1.166390e-05 # Gf + 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) + +################################### +## INFORMATION FOR YUKAWA +################################### +Block yukawa + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau + +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # WT +DECAY 23 2.441404e+00 # WZ +DECAY 24 2.047600e+00 # WW +DECAY 25 6.382339e-03 # WH +## Dependent parameters, given by model restrictions. +## Those values should be edited following the +## analytical expression. MG5 ignores those values +## but they are important for interfacing the output of MG5 +## to external program such as Pythia. +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 2 +# Integrated weight (pb) : 439.19338 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +4.391934e+02 3.661122e+00 4.391934e+02 1 +please cite 1405.0301 + + + 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 + -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 + 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 + 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 + -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.21669541E+03 +0 + 1 21 0.24236690E-01 0.21840939E+03 + 1 21 0.65523357E-01 0.21840939E+03 + 0.33953413E+04 + + + + 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 + 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 + 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 + -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 + -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 + + 2 0.31576070E+03 +0 + 1 21 0.81905139E-01 0.31588770E+03 + 1 21 0.32388313E-01 0.31588770E+03 + 0.11189986E+04 + + +
diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index c97f3e6a27..bbf7596c2f 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -1294,10 +1294,10 @@ namespace REX return content; } lhePrt(){ return; } - lhePrt( std::pair prtInfo ){ - status = std::to_string( prtInfo.first ); - pdg = std::to_string( prtInfo.second ); - } + // lhePrt( std::pair prtInfo ){ + // status = std::to_string( prtInfo.first ); + // pdg = std::to_string( prtInfo.second ); + // } lhePrt( std::pair& prtInfo ){ status = std::to_string( prtInfo.first ); pdg = std::to_string( prtInfo.second ); @@ -1378,9 +1378,9 @@ namespace REX return modStat; } event(){ return; } - event( std::vector> prtInfo ){ + event( std::vector>& prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); - for( auto prt : prtInfo ){ + for( auto& prt : prtInfo ){ prts.push_back( std::make_shared( prt ) ); } } @@ -3447,15 +3447,14 @@ namespace REX template std::shared_ptr> vectorFlat( std::vector>> vecVec ) { - if( vecVec.size() == relProcs.size() ) continue; - else throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); for( size_t k = 0 ; k < vecVec.size() ; ++k){ if( vecVec[k]->size() == relProcs[k]->size() ) continue; else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); } auto flatVec = std::make_shared>(relProcs[0]->size()); for( size_t k = 0 ; k < relProcs.size() ; ++k ){ - currInd = 0; + size_t currInd = 0; for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ if( relProcs[k]->at(j) ){ flatVec->at(currInd) = vecVec[k]->at(currInd); diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 57838a2dd0..f4c6ab927f 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -15,7 +15,7 @@ #include "rwgt_instance.h" #include #include -//%(include_lines)s +%(include_lines)s int usage( char* argv0, int ret = 1 ) { @@ -89,7 +89,6 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) -//%(rwgt_runners)s std::vector runSet = {%(run_set)s}; std::vector runSet; diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index a770bf69aa..689daabcdd 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -95,8 +95,8 @@ namespace %(process_namespace)s{ }; std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ - std::vector initPrts = %(init_prt_ids)s - std::vector finPrts = %(fin_prt_ids)s + std::vector initPrts = %(init_prt_ids)s; + std::vector finPrts = %(fin_prt_ids)s; // std::vector initPrts = {"-1"}; // std::vector finPrts = {"1"}; if( status == "-1" ){ diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index e6b2c5f1e3..2c3c7ec7d1 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -450,7 +450,7 @@ namespace REX::teaw initMEs = {}; for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) { - auto ins = meEvals[eventFile.subProcs[k]]( *(momenta[k]), *(gS[k]) ); + auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); } //auto ins = meEval( *(momenta[0]), *(gS[0]) ); @@ -487,7 +487,7 @@ namespace REX::teaw } template void setNormWgts(Args&&... args){ - if( !oneME() ){ setMEs(args); } + if( !oneME() ){ setMEs(args...); } //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); for( auto k = 0; k < initMEs.size() ; ++k ){ diff --git a/tools/REX/tester.cpp b/tools/REX/tester.cpp new file mode 100644 index 0000000000..d7d8493c25 --- /dev/null +++ b/tools/REX/tester.cpp @@ -0,0 +1,62 @@ +#include "teawREX.hpp" +#include +#include + +std::shared_ptr> meEval( std::vector& x, std::vector& y){ + int random = rand() % 10; + if( random == 0 ){ random = 11; } + auto thisIsIt = std::make_shared>( y.size(), random ); + return thisIsIt; +} + +std::shared_ptr> sortFunc(std::vector arguments){ + return REX::stoiSort(arguments); +} + +std::shared_ptr> sorterFunc(std::string_view dummy, std::vector arguments){ + return REX::stoiSort(arguments); +} + +int main( int argc, char* argv[] ){ + std::string lheFilePath; + + // READ COMMAND LINE ARGUMENTS + for( int arg = 0; arg < argc; arg++ ) + { + auto currArg = std::string( argv[arg] ); + if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) + { + lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); + } + } + + + std::string currPath = argv[0]; + auto sembler = std::function>(std::vector)>(sortFunc); + auto sembler2 = std::function>(std::string_view, std::vector)>(sorterFunc); + auto lheFile = REX::filePuller(lheFilePath); + //std::cout << lheFile->substr(0, 1) << "\n"; + //std::cout << bool(lheFile->compare(0, 1, "<")) << "\n"; + //std::cout << lheFile->substr(1968, 1999 - 1968) << "\n"; + auto parseLhe = REX::lheNode(*lheFile); + //std::cout << *parseLhe.nodeWriter() << "\n"; + auto treeMan = parseLhe.getTree(); + //std::cout << parseLhe.getChildren().size() << " & " << parseLhe.getEvents().size() << " & " << treeMan.getChildren()->size() << "\n"; + auto proceses = REX::lheReOrder(parseLhe, {"-1", "1", "2"} ); + auto processes2 = REX::lheEvReOrder(parseLhe, {"-1", "1", "2"} ); + //std::cout << proceses.size() << " & " << processes2.size() << "\n"; + bool comp = REX::evProcComp( *parseLhe.getEvents()[0], *parseLhe.getEvents()[1], {"-1", "1"} ); + if( comp ){ std::cout << "true\n"; } + else{ std::cout << "false\n"; } + auto evlist = REX::evProcessPull( parseLhe, {"-1", "1"} ); + //auto evsVals = lheValDoubles(parseLhe); + auto evsVals = lheValDoubles(parseLhe, sembler2); + int siz = 0; + for( auto& ev : *evsVals ){ + siz += ev->size(); + } + std::cout << evsVals->size() << "\n"; + std::cout << siz << "\n"; + return 0; + +} \ No newline at end of file diff --git a/tools/REX/unweighted_events.lhe b/tools/REX/unweighted_events.lhe new file mode 100644 index 0000000000..6b05b56584 --- /dev/null +++ b/tools/REX/unweighted_events.lhe @@ -0,0 +1,1870 @@ + +
+ + +3.5.2 + + + 3j +output +]]> + + +#********************************************************************* +# MadGraph/MadEvent * +# http://madgraph.hep.uiuc.edu * +# * +# proc_card.dat * +#********************************************************************* +# * +# This Files is generated by MADGRAPH 5 * +# * +# WARNING: This Files is generated for MADEVENT (compatibility issue)* +# This files is NOT a valid MG4 proc_card.dat * +# Running this in MG4 will NEVER reproduce the result of MG5* +# * +#********************************************************************* +#********************************************************************* +# Process(es) requested : mg2 input * +#********************************************************************* +# Begin PROCESS # This is TAG. Do not modify this line +p p > 3j #Process +# Be carefull the coupling are here in MG5 convention + +end_coup # End the couplings input + +done # this tells MG there are no more procs +# End PROCESS # This is TAG. Do not modify this line +#********************************************************************* +# Model information * +#********************************************************************* +# Begin MODEL # This is TAG. Do not modify this line +sm +# End MODEL # This is TAG. Do not modify this line +#********************************************************************* +# Start multiparticle definitions * +#********************************************************************* +# Begin MULTIPARTICLES # This is TAG. Do not modify this line + +# End MULTIPARTICLES # This is TAG. Do not modify this line + + + + + +###################################################################### +## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### +###################################################################### +################################### +## INFORMATION FOR MASS +################################### +BLOCK MASS # + 5 4.700000e+00 # mb + 6 1.730000e+02 # mt + 15 1.777000e+00 # mta + 23 9.118800e+01 # mz + 25 1.250000e+02 # mh + 1 0.000000e+00 # d : 0.0 + 2 0.000000e+00 # u : 0.0 + 3 0.000000e+00 # s : 0.0 + 4 0.000000e+00 # c : 0.0 + 11 0.000000e+00 # e- : 0.0 + 12 0.000000e+00 # ve : 0.0 + 13 0.000000e+00 # mu- : 0.0 + 14 0.000000e+00 # vm : 0.0 + 16 0.000000e+00 # vt : 0.0 + 21 0.000000e+00 # g : 0.0 + 22 0.000000e+00 # a : 0.0 + 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) +################################### +## INFORMATION FOR SMINPUTS +################################### +BLOCK SMINPUTS # + 1 1.325070e+02 # aewm1 + 2 1.166390e-05 # gf + 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) +################################### +## INFORMATION FOR YUKAWA +################################### +BLOCK YUKAWA # + 5 4.700000e+00 # ymb + 6 1.730000e+02 # ymt + 15 1.777000e+00 # ymtau +################################### +## INFORMATION FOR DECAY +################################### +DECAY 6 1.491500e+00 # wt +DECAY 23 2.441404e+00 # wz +DECAY 24 2.047600e+00 # ww +DECAY 25 6.382339e-03 # wh +DECAY 1 0.000000e+00 # d : 0.0 +DECAY 2 0.000000e+00 # u : 0.0 +DECAY 3 0.000000e+00 # s : 0.0 +DECAY 4 0.000000e+00 # c : 0.0 +DECAY 5 0.000000e+00 # b : 0.0 +DECAY 11 0.000000e+00 # e- : 0.0 +DECAY 12 0.000000e+00 # ve : 0.0 +DECAY 13 0.000000e+00 # mu- : 0.0 +DECAY 14 0.000000e+00 # vm : 0.0 +DECAY 15 0.000000e+00 # ta- : 0.0 +DECAY 16 0.000000e+00 # vt : 0.0 +DECAY 21 0.000000e+00 # g : 0.0 +DECAY 22 0.000000e+00 # a : 0.0 + + +# Number of Events : 100 +# Integrated weight (pb) : 66372287.22200001 + +
+ +2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 +6.637229e+07 1.268397e+06 6.637229e+07 1 +please cite 1405.0301 + + + 5 1 +6.6372287e+07 4.60140800e+01 7.54677100e-03 1.46810800e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.8788806474e+02 1.8788806474e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.0556910363e+01 3.0556910363e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.0298827890e+01 -4.1053633424e+01 +8.3051244550e+01 9.3214676391e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +1.7276524932e+01 -1.2156784273e+01 -1.1495329061e+01 2.4050120744e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -6.9776970424e+00 +5.3210417698e+01 +8.5775238884e+01 1.0118017797e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.46014081E+02 +0 + 1 21 0.28905856E-01 0.46014081E+02 + 1 21 0.47010632E-02 0.46014081E+02 + 0.31830845E+06 + + + + 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32555892E+02 +0 + 1 21 0.43037713E-01 0.32555892E+02 + 1 21 0.77100414E-02 0.32555892E+02 + 0.65037882E+05 + + + + 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30590836E+02 +0 + 1 21 0.97313711E-01 0.30590836E+02 + 1 2 0.70353702E-01 0.30590836E+02 + 0.91658669E+02 + + + + 5 1 +6.6372287e+07 1.24970000e+02 7.54677100e-03 1.23511600e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +6.4054339688e+02 6.4054339688e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.3928351011e+01 3.3928351011e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -3.6546574781e+01 +7.3293152180e+00 +5.3085336864e+01 6.4864658942e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 -8.4789922053e+01 +1.0871076160e+01 +6.9212770934e+01 1.0999053977e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +1.2133649683e+02 -1.8200391378e+01 +4.8431693807e+02 4.9961654918e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.12497005E+03 +0 + 1 21 0.98545129E-01 0.12497005E+03 + 1 21 0.52197468E-02 0.12497005E+03 + 0.21698561E+05 + + + + 5 1 +6.6372287e+07 2.09917500e+01 7.54677100e-03 1.72629600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.9393491974e+01 2.9393491974e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0612634540e+01 5.0612634540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.2361493101e+01 -8.0134576492e+00 -2.5339678876e+01 3.4732566890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +5.3440837509e+00 +2.0304167068e+01 +1.0307030697e+01 2.3389170854e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +1.7017409350e+01 -1.2290709419e+01 -6.1864943863e+00 2.1884388769e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.20991755E+02 +0 + 1 21 0.45220758E-02 0.20991755E+02 + 1 21 0.77865590E-02 0.20991755E+02 + 0.28846636E+07 + + + + 5 1 +6.6372287e+07 2.01883800e+01 7.54677100e-03 1.74160800e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.9616331394e+01 2.9616331394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.1809592212e+02 2.1809592212e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.4332017667e+01 -1.5898231494e+01 -1.1283261663e+02 1.1484493837e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +5.5127621513e+00 +1.9607065270e+01 -9.9531289229e+01 1.0159382408e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.9844779818e+01 -3.7088337755e+00 +2.3884315130e+01 3.1273491063e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.20188381E+02 +0 + 1 21 0.45563588E-02 0.20188381E+02 + 1 21 0.33553218E-01 0.20188381E+02 + 0.23199633E+06 + + + + 5 1 +6.6372287e+07 2.83114100e+01 7.54677100e-03 1.61754100e-01 + -3 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.8261154183e+01 2.8261154183e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -6.5070264344e+01 6.5070264344e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +7.3144338996e+00 +3.7539358060e+01 -8.3663539266e+00 3.9149715515e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.5538451858e+01 -1.6013356486e+01 -1.8894895213e+01 2.9238470159e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -3 1 1 2 0 504 +8.2240179584e+00 -2.1526001574e+01 -9.5478610208e+00 2.4943232854e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.28311412E+02 +0 + 1 21 0.10010810E-01 0.28311412E+02 + 1 -3 0.43478699E-02 0.28311412E+02 + 0.75606750E+05 + + + + 5 1 +6.6372287e+07 2.50484100e+01 7.54677100e-03 1.66030800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.5278855952e+02 4.5278855952e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3454632319e+00 3.3454632319e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -1.5102249073e+01 -2.7392413109e+01 +1.7894067235e+02 1.8165402953e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.1057931191e+01 +3.9670307239e+00 +9.9776507011e+01 1.0205158083e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -5.9556821180e+00 +2.3425382385e+01 +1.7072591693e+02 1.7242841240e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.25048406E+02 +0 + 1 21 0.69659730E-01 0.25048406E+02 + 1 21 0.51468701E-03 0.25048406E+02 + 0.16161844E+07 + + + + 5 1 +6.6372287e+07 6.54738600e+01 7.54677100e-03 1.37619800e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +2.6663794394e+01 2.6663794394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.5265738923e+02 2.5265738923e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 -7.6137868107e+00 +4.2439462980e+01 -1.6255497692e+02 1.6817609310e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.7587948234e+01 +1.0621679064e+01 -4.5177420050e+01 4.9630185085e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +2.5201735045e+01 -5.3061142044e+01 -1.8261197867e+01 6.1514905444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.65473858E+02 +0 + 1 21 0.41021221E-02 0.65473858E+02 + 1 1 0.38870368E-01 0.65473858E+02 + 0.41073273E+05 + + + + 5 1 +6.6372287e+07 4.71053000e+01 7.54677100e-03 1.46161100e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.0723487937e+02 1.0723487937e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2434583342e+02 1.2434583342e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -1.5074548460e+01 +4.4668996332e+01 +7.0907382043e+01 8.5149386802e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -3.7446327852e+01 -2.8577640944e+01 -7.7213750461e+01 9.0448174619e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +5.2520876312e+01 -1.6091355388e+01 -1.0804585631e+01 5.5983151371e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.47105297E+02 +0 + 1 21 0.16497674E-01 0.47105297E+02 + 1 21 0.19130128E-01 0.47105297E+02 + 0.81247298E+05 + + + + 5 1 +6.6372287e+07 4.77488600e+01 7.54677100e-03 1.45787600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.3351097238e+02 1.3351097238e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.1959914093e+03 2.1959914093e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.6534412892e+01 +2.0887502154e+01 -2.0204850067e+03 2.0207671872e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.7723341226e+01 -6.8071401227e+01 -1.6302498162e+02 1.7882797305e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 -1.1889283334e+00 +4.7183899072e+01 +1.2102955134e+02 1.2990722143e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.47748865E+02 +0 + 1 21 0.20540149E-01 0.47748865E+02 + 1 21 0.33784484E+00 0.47748865E+02 + 0.69049208E+02 + + + + 5 1 +6.6372287e+07 5.17648700e+01 7.54677100e-03 1.43604800e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.0572435077e+02 7.0572435077e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -8.6069634546e+00 8.6069634546e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.3775888591e+01 -5.0832360721e+00 +1.9988986553e+01 3.1475256166e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +2.1064391322e+01 -6.4983212153e+00 +5.8809348241e+01 6.2805065090e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 502 -4.4840279913e+01 +1.1581557287e+01 +6.1831905252e+02 6.2005099297e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.51764867E+02 +0 + 1 21 0.13241488E-02 0.51764867E+02 + 1 -2 0.10857293E+00 0.51764867E+02 + 0.12387408E+05 + + + + 5 1 +6.6372287e+07 2.68215700e+01 7.54677100e-03 1.63613700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.7995183998e+01 4.7995183998e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -4.6755507222e+02 4.6755507222e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -5.6572765501e+00 -2.1816941248e+01 +1.1968309353e+01 2.5519093482e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.3663698968e+01 +1.2778475361e+01 -4.5222293332e+02 4.5302189959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +2.9320975518e+01 +9.0384658865e+00 +2.0694735751e+01 3.7009263148e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26821571E+02 +0 + 1 21 0.73838749E-02 0.26821571E+02 + 1 21 0.71931546E-01 0.26821571E+02 + 0.24837378E+05 + + + + 5 1 +6.6372287e+07 4.31543000e+01 7.54677100e-03 1.48620400e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +7.1563261884e+02 7.1563261884e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4644760996e+01 1.4644760996e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.2645094462e+01 -4.3267730831e+01 +6.6587442685e+02 6.6739849211e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -1.7724111495e+01 +1.5186816241e+01 +1.2556432303e+01 2.6503726304e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +5.0790170329e+00 +2.8080914591e+01 +2.2556998696e+01 3.6375161422e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43154296E+02 +0 + 1 21 0.11009730E+00 0.43154296E+02 + 1 21 0.22530407E-02 0.43154296E+02 + 0.66560154E+05 + + + + 5 1 +6.6372287e+07 4.37774800e+01 7.54677100e-03 1.48212100e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.6494856549e+02 2.6494856549e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4840936233e+01 4.4840936233e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 -2.3919572741e+01 -2.1836315356e+01 -2.8033165864e+01 4.2834904188e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +2.0968394452e+01 -2.8904789122e+01 +4.4445065176e+01 5.7013368771e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +2.9511782884e+00 +5.0741104479e+01 +2.0369572994e+02 2.0994122876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43777479E+02 +0 + 1 21 0.40761317E-01 0.43777479E+02 + 1 21 0.68986058E-02 0.43777479E+02 + 0.88070658E+05 + + + + 5 1 +6.6372287e+07 3.19042100e+01 7.54677100e-03 1.57794600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.5412150098e+01 3.5412150098e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.5668427371e+02 7.5668427371e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +4.2078004819e+00 -3.1373137318e+01 -7.1649764593e+02 7.1719652534e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +1.7730843460e+01 -1.5141321578e+01 -1.1865670592e+00 2.3346313849e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -2.1938643942e+01 +4.6514458896e+01 -3.5879106177e+00 5.1553584618e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.31904206E+02 +0 + 1 21 0.54480234E-02 0.31904206E+02 + 1 21 0.11641296E+00 0.31904206E+02 + 0.14302972E+05 + + + + 5 1 +6.6372287e+07 2.62752100e+01 7.54677100e-03 1.64333300e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +7.3680834147e+01 7.3680834147e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -4.3076387169e+02 4.3076387169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +5.3624128570e+00 +1.9370699338e+01 -1.8582414279e+02 1.8690797605e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.5600742238e+01 -2.1293512835e+01 +7.0284358620e+01 7.5077878991e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.0963155095e+01 +1.9228134974e+00 -2.4154325337e+02 2.4245885080e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26275207E+02 +0 + 1 21 0.11335513E-01 0.26275207E+02 + 1 2 0.66271366E-01 0.26275207E+02 + 0.56080712E+04 + + + + 5 1 +6.6372287e+07 4.55308900e+01 7.54677100e-03 1.47105400e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.1459625930e+03 2.1459625930e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5418446222e+00 5.5418446222e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +3.4560110742e+01 -1.7822362191e+01 +1.6729667012e+02 1.7175626242e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +8.6373215770e+00 +3.7927160061e+01 +1.2285930833e+02 1.2887002149e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -4.3197432319e+01 -2.0104797870e+01 +1.8502647699e+03 1.8508781537e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.45530891E+02 +0 + 1 21 0.85259319E-03 0.45530891E+02 + 1 2 0.33014743E+00 0.45530891E+02 + 0.37889394E+05 + + + + 5 1 +6.6372287e+07 3.49649300e+01 7.54677100e-03 1.54891200e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0100600886e+00 8.0100600886e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.6748462249e+03 1.6748462249e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -2.8388621465e+01 +1.5105638110e+01 -1.9595061691e+02 1.9857174623e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 +5.9035355306e+00 -3.7141587409e+01 -1.4389449039e+03 1.4394362736e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 +2.2485085934e+01 +2.2035949299e+01 -3.1940644021e+01 4.4848265200e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34964932E+02 +0 + 1 2 0.25766864E+00 0.34964932E+02 + 1 -2 0.12323170E-02 0.34964932E+02 + 0.15263237E+04 + + + + 5 1 +6.6372287e+07 3.04072400e+01 7.54677100e-03 1.59363000e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.3031354025e+01 2.3031354025e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8224559169e+02 1.8224559169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.5754317368e+01 -4.7408923451e+01 -9.5689678327e+01 1.0985174293e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.1009373966e+01 +2.4882397341e+01 -8.6110655342e-01 2.7222812438e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -1.4744943402e+01 +2.2526526110e+01 -6.2663452789e+01 6.8202390354e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30407236E+02 +0 + 1 21 0.35432851E-02 0.30407236E+02 + 1 21 0.28037785E-01 0.30407236E+02 + 0.50703811E+06 + + + + 5 1 +6.6372287e+07 2.46316000e+01 7.54677100e-03 1.66635000e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0590561410e+01 8.0590561410e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.2809489236e+02 6.2809489236e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.7815991771e+01 +2.0155583443e+01 -8.5520591269e+00 2.8227554305e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -7.5126314979e+00 -1.9813252642e+01 -6.0724710539e+02 6.0761669795e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +2.5328623269e+01 -3.4233080119e-01 +6.8294833568e+01 7.2841201522e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.24631597E+02 +0 + 1 21 0.96629979E-01 0.24631597E+02 + 1 -1 0.12398548E-01 0.24631597E+02 + 0.37172940E+03 + + + + 5 1 +6.6372287e+07 2.48386400e+01 7.54677100e-03 1.66333000e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +3.4621419117e+02 3.4621419117e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.8471995540e+01 1.8471995540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +2.4946999999e+01 +1.5290445725e+00 +2.8662250343e+02 2.8771018449e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 -1.3234624373e+00 -2.4231607655e+01 -1.6739991815e-01 2.4268300005e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 502 -2.3623537562e+01 +2.2702563083e+01 +4.1287092120e+01 5.2707702219e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.24838643E+02 +0 + 1 21 0.53263717E-01 0.24838643E+02 + 1 21 0.28418457E-02 0.24838643E+02 + 0.20767655E+06 + + + + 5 1 +6.6372287e+07 3.39483100e+01 7.54677100e-03 1.55814300e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.5448573077e+01 2.5448573077e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.0820396951e+03 1.0820396951e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +4.5361883356e+01 +2.5711927708e+01 -5.3689272592e+02 5.3941876389e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 -1.6783464189e+01 -2.3932337766e+01 -5.3486766152e+02 5.3566580701e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 -2.8578419167e+01 -1.7795899428e+00 +1.5169265433e+01 3.2403697261e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33948308E+02 +0 + 1 2 0.16646767E+00 0.33948308E+02 + 1 -2 0.39151646E-02 0.33948308E+02 + 0.59650818E+03 + + + + 5 1 +6.6372287e+07 4.00572800e+01 7.54677100e-03 1.50779000e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.2066229463e+01 3.2066229463e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.0083738526e+02 3.0083738526e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +3.4238964599e+01 -3.1475020468e+00 -2.1121471239e+02 2.1399501909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.6612873637e+01 +3.9603631259e+01 +6.1874354643e+00 4.3390316167e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.7626090962e+01 -3.6456129213e+01 -6.3743878874e+01 7.5518279467e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40057279E+02 +0 + 1 21 0.49332658E-02 0.40057279E+02 + 1 21 0.46282677E-01 0.40057279E+02 + 0.11855536E+06 + + + + 5 1 +6.6372287e+07 4.37051900e+01 7.54677100e-03 1.48259100e-01 + 2 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +1.7110304904e+03 1.7110304904e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.0783382913e+01 2.0783382913e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.4418127206e+01 +2.0747890384e+01 +7.9570356529e+01 8.3485321978e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +3.6111610870e+01 -3.0183395268e+01 +1.6215922235e+03 1.6222750769e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 502 0 -2.1693483664e+01 +9.4355048838e+00 -1.0915472595e+01 2.6053474392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.43705192E+02 +0 + 1 2 0.26323535E+00 0.43705192E+02 + 1 1 0.31974449E-02 0.43705192E+02 + 0.43186860E+03 + + + + 5 1 +6.6372287e+07 3.25233300e+01 7.54677100e-03 1.57176200e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.1530668898e+01 2.1530668898e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -5.8911499310e+02 5.8911499310e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +3.1914103860e+01 -3.6894354070e+01 -1.3312376381e+02 1.4178025208e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.0765125773e+01 +2.9189748902e+01 +2.6051922163e+00 3.1220448433e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.1148978087e+01 +7.7046051684e+00 -4.3706575261e+02 4.3764496149e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32523330E+02 +0 + 1 21 0.33124105E-02 0.32523330E+02 + 1 2 0.90633079E-01 0.32523330E+02 + 0.30633976E+05 + + + + 5 1 +6.6372287e+07 3.61852100e+01 7.54677100e-03 1.53832100e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.3002025114e+01 4.3002025114e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3404139915e+02 3.3404139915e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +5.4545664238e+00 -2.1319807632e+01 -2.9481524350e+02 2.9563544153e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.1100319594e+01 -8.7134369374e+00 +9.7127797801e+00 3.3726724614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -3.6554886018e+01 +3.0033244569e+01 -5.9369103163e+00 4.7681258113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.36185209E+02 +0 + 1 21 0.66156963E-02 0.36185209E+02 + 1 21 0.51390983E-01 0.36185209E+02 + 0.59410236E+05 + + + + 5 1 +6.6372287e+07 2.68952500e+01 7.54677100e-03 1.63518300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.8770752959e+02 2.8770752959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.6931152162e+02 2.6931152162e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.0961573832e+01 +2.3688081609e+00 -1.7971774862e+02 1.8095156257e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -3.7349663467e+00 -2.3130388947e+01 -8.6786083310e+01 8.9893209547e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.7226607485e+01 +2.0761580786e+01 +2.8489983990e+02 2.8617427909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.26895249E+02 +0 + 1 21 0.44262697E-01 0.26895249E+02 + 1 21 0.41432541E-01 0.26895249E+02 + 0.32158164E+04 + + + + 5 1 +6.6372287e+07 2.51016900e+01 7.54677100e-03 1.65954600e-01 + 2 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +6.4617848855e+01 6.4617848855e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -4.5852280566e+01 4.5852280566e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +1.2633388858e+01 +1.7296317379e+01 -2.9732559349e+01 3.6644101767e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +1.3130515800e+01 -2.4553339855e+01 +1.7570358035e+01 3.2924070597e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.5763904658e+01 +7.2570224764e+00 +3.0927769604e+01 4.0901957057e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.25101687E+02 +0 + 1 21 0.70541970E-02 0.25101687E+02 + 1 2 0.99412075E-02 0.25101687E+02 + 0.89083039E+05 + + + + 5 1 +6.6372287e+07 2.65415900e+01 7.54677100e-03 1.63979800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.3037786153e+01 7.3037786153e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -8.2017257442e+01 8.2017257442e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.8697084487e+01 -8.2924898880e+00 -5.9625078565e+01 6.3035675222e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +6.9951105287e+00 +2.5911255642e+01 +5.4380851637e+01 6.0643233464e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -2.5692195015e+01 -1.7618765754e+01 -3.7352443596e+00 3.1376134909e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.26541591E+02 +0 + 1 21 0.11236582E-01 0.26541591E+02 + 1 21 0.12618040E-01 0.26541591E+02 + 0.30903565E+06 + + + + 5 1 +6.6372287e+07 2.27761200e+01 7.54677100e-03 1.69516500e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +8.6173848945e+01 8.6173848945e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.3585034087e+01 6.3585034087e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.7032116927e+01 +8.2973252626e+00 +1.6307827832e+01 3.2642398819e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.9247818195e+01 +1.2926007751e+01 -5.0735466398e+01 5.5782145282e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +7.7842987316e+00 -2.1223333014e+01 +5.7016453425e+01 6.1334338931e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.22776118E+02 +0 + 1 21 0.13257515E-01 0.22776118E+02 + 1 21 0.97823130E-02 0.22776118E+02 + 0.35046139E+06 + + + + 5 1 +6.6372287e+07 3.80456700e+01 7.54677100e-03 1.52310600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3757684306e+01 1.3757684306e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.9694458511e+02 3.9694458511e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 -1.4438846203e+01 -1.7702498483e+01 -8.3619869477e+01 8.6684146783e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.4547368666e+01 -2.2261281669e+01 -2.7669903846e+02 2.7797400684e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.0852246356e-01 +3.9963780152e+01 -2.2867992874e+01 4.6044115794e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.38045671E+02 +0 + 1 21 0.21165667E-02 0.38045671E+02 + 1 21 0.61068400E-01 0.38045671E+02 + 0.26069372E+06 + + + + 5 1 +6.6372287e+07 4.59126200e+01 7.54677100e-03 1.46872300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.0311609080e+03 2.0311609080e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.4721053331e+01 2.4721053331e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +5.4574055215e+00 +4.5277462040e+01 +1.8656515221e+03 1.8662088398e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.1344441190e+01 +1.9716704689e+01 +9.5251287709e+01 1.0219603832e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -3.6801846712e+01 -6.4994166729e+01 +4.5537044828e+01 8.7477083181e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.45912622E+02 +0 + 1 21 0.31248616E+00 0.45912622E+02 + 1 21 0.38032406E-02 0.45912622E+02 + 0.16431983E+04 + + + + 5 1 +6.6372287e+07 3.23160000e+01 7.54677100e-03 1.57381400e-01 + 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +5.8873030751e+02 5.8873030751e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -2.0960881505e+01 2.0960881505e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +1.2271827274e+01 +2.2294515262e+01 +1.4647225131e+02 1.4866661885e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.0219290079e+01 -9.7210766525e-02 -1.3589093295e+01 2.4361703508e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 -3.2491117354e+01 -2.2197304495e+01 +4.3488626799e+02 4.3666286666e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32316004E+02 +0 + 1 21 0.32247515E-02 0.32316004E+02 + 1 1 0.90573881E-01 0.32316004E+02 + 0.20132875E+05 + + + + 5 1 +6.6372287e+07 4.71162200e+01 7.54677100e-03 1.46154700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.5698751231e+01 3.5698751231e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5024197302e+02 1.5024197302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -1.8393329399e+01 -2.0730266036e+01 +1.5134275211e+01 3.1576966011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +3.0318818959e+01 +4.1981229081e+01 -1.1447233348e+02 1.2564063639e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.1925489561e+01 -2.1250963045e+01 -1.5205163517e+01 2.8723121856e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.47116224E+02 +0 + 1 21 0.54921154E-02 0.47116224E+02 + 1 21 0.23114150E-01 0.47116224E+02 + 0.37322948E+06 + + + + 5 1 +6.6372287e+07 5.00477800e+01 7.54677100e-03 1.44508400e-01 + 2 -1 0 0 503 0 +0.0000000000e+00 +0.0000000000e+00 +3.8911714874e+02 3.8911714874e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -3.0237812812e+02 3.0237812812e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +2.7035206944e+01 +1.1996257553e+01 +1.2886486117e+02 1.3221560064e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.2748413249e+01 -4.4788353833e+01 -2.9753120134e+02 3.0174211293e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 502 0 -4.2867936948e+00 +3.2792096280e+01 +2.5540536079e+02 2.5753756328e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.50047775E+02 +0 + 1 2 0.59864176E-01 0.50047775E+02 + 1 2 0.46519712E-01 0.50047775E+02 + 0.13730376E+03 + + + + 5 1 +6.6372287e+07 2.97093000e+01 7.54677100e-03 1.60132400e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.1854806368e+02 1.1854806368e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.8789738375e+02 2.8789738375e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.1808462259e+01 +1.9091314842e+01 +2.7760747717e+01 4.0134105724e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +5.0913227713e+01 -2.3763320633e+01 +7.4385366719e+01 9.3220356927e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -2.9104765455e+01 +4.6720057908e+00 -2.7149543450e+02 2.7309098477e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29709305E+02 +0 + 1 21 0.18238164E-01 0.29709305E+02 + 1 1 0.44291905E-01 0.29709305E+02 + 0.26942502E+04 + + + + 5 1 +6.6372287e+07 2.76081800e+01 7.54677100e-03 1.62613900e-01 + 1 -1 0 0 501 0 +0.0000000000e+00 +0.0000000000e+00 +3.5918383131e+02 3.5918383131e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -9.9599640123e+00 9.9599640123e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -1.0226056517e+01 -1.7566903028e+01 +5.4113018863e+01 5.7804732716e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -1.8623508510e+01 -1.7954792984e+01 +2.3170430073e+02 2.3314393117e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 503 +2.8849565027e+01 +3.5521696012e+01 +6.3406547700e+01 7.8195131441e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27608180E+02 +0 + 1 1 0.55259041E-01 0.27608180E+02 + 1 -4 0.15323024E-02 0.27608180E+02 + 0.31455192E+04 + + + + 5 1 +6.6372287e+07 3.28240500e+01 7.54677100e-03 1.56881800e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.7881981423e+00 5.7881981423e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2709795693e+03 1.2709795693e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.9221921883e+01 +8.0360733545e+00 -5.6807285970e+02 5.6888071959e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.6963846300e+01 -2.9307832371e+01 -5.0500567831e+01 6.0803194577e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 503 +1.2258075583e+01 +2.1271759017e+01 -6.4661794361e+02 6.4708385326e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32824047E+02 +0 + 1 21 0.89049153E-03 0.32824047E+02 + 1 21 0.19553543E+00 0.32824047E+02 + 0.68369125E+05 + + + + 5 1 +6.6372287e+07 2.96747300e+01 7.54677100e-03 1.60171200e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +4.0441816137e+01 4.0441816137e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.0683900114e+02 1.0683900114e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.6554357024e+01 +3.2973328106e+01 +7.1332905671e+00 4.2933181546e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +2.0427248287e+01 -1.1559769425e+01 +3.8860694556e+00 2.3790802373e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +6.1271087369e+00 -2.1413558681e+01 -7.7416545026e+01 8.0556833358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29674731E+02 +0 + 1 21 0.62218180E-02 0.29674731E+02 + 1 2 0.16436769E-01 0.29674731E+02 + 0.63902127E+05 + + + + 5 1 +6.6372287e+07 2.77849600e+01 7.54677100e-03 1.62394800e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5901367143e+01 5.5901367143e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 -1 0 0 501 0 +0.0000000000e+00 -0.0000000000e+00 -1.6721775392e+02 1.6721775392e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.7214919673e+01 +1.5725661972e+01 -4.6993551561e+00 2.3785160136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 1 1 2 502 0 -7.1368635003e+00 -2.7166369610e+01 -1.4763278291e+02 1.5028102025e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -2 1 1 2 0 503 +2.4351783173e+01 +1.1440707638e+01 +4.1015751290e+01 4.9052940675e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27784960E+02 +0 + 1 4 0.25725808E-01 0.27784960E+02 + 1 -2 0.86002106E-02 0.27784960E+02 + 0.26447976E+03 + + + + 5 1 +6.6372287e+07 5.39590300e+01 7.54677100e-03 1.42508700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.9800586950e+01 5.9800586950e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -7.5246990846e+01 7.5246990846e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.7215966258e+01 -1.3481800279e+01 -1.3120982800e+01 2.5501149436e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 -4.4948096317e+01 +2.7284215874e+01 +3.3376063191e+01 6.2279381760e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 +2.7732130059e+01 -1.3802415596e+01 -3.5701484287e+01 4.7267046599e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.53959031E+02 +0 + 1 21 0.92000904E-02 0.53959031E+02 + 1 21 0.11576460E-01 0.53959031E+02 + 0.54280239E+06 + + + + 5 1 +6.6372287e+07 3.74932000e+01 7.54677100e-03 1.52751300e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +7.5616997299e+02 7.5616997299e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -9.5383624010e+00 9.5383624010e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +2.4198598086e+01 -2.8579386523e+01 +4.8877048754e+01 6.1573690634e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -4.1183674764e+01 +1.1831705659e+01 +1.5380528884e+02 1.5966261679e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +1.6985076678e+01 +1.6747680864e+01 +5.4394927300e+02 5.4447202797e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.37493203E+02 +0 + 1 21 0.11633379E+00 0.37493203E+02 + 1 21 0.14674410E-02 0.37493203E+02 + 0.11386933E+06 + + + + 5 1 +6.6372287e+07 4.04284600e+01 7.54677100e-03 1.50508200e-01 + 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +4.0351583823e+02 4.0351583823e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 4 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.3201441918e+01 1.3201441918e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 501 0 +4.2439120380e+01 +1.6559287496e+01 +4.1887418346e+01 6.1885739526e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 502 0 -2.1117804890e+01 +8.9613985453e+00 +2.4763202138e+02 2.4869235283e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 1 1 2 0 501 -2.1321315491e+01 -2.5520686042e+01 +1.0079495659e+02 1.0613918778e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40428461E+02 +0 + 1 21 0.62079350E-01 0.40428461E+02 + 1 4 0.20309914E-02 0.40428461E+02 + 0.80181641E+04 + + + + 5 1 +6.6372287e+07 3.42643000e+01 7.54677100e-03 1.55523200e-01 + 1 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +3.3344584104e+03 3.3344584104e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -9.8032652357e-01 9.8032652357e-01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.6378085835e+01 +1.3734600814e+01 +2.6780940543e+02 2.6866104777e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.1482442393e+01 -1.5710524314e+01 +4.2265106582e+02 4.2348818088e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +3.7860528228e+01 +1.9759235008e+00 +2.6430176126e+03 2.6432895082e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34264300E+02 +0 + 1 21 0.15082213E-03 0.34264300E+02 + 1 1 0.51298452E+00 0.34264300E+02 + 0.33282672E+05 + + + + 5 1 +6.6372287e+07 1.19571300e+02 7.54677100e-03 1.24380600e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.7854154861e+01 8.7854154861e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.7934232150e+03 1.7934232150e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +1.7605310395e+01 +1.3701422770e+01 +2.6274785914e+01 3.4467960701e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 502 0 +1.1756210603e+02 +1.7577983299e+01 -1.7004020433e+03 1.7045518306e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 502 -1.3516741643e+02 -3.1279406068e+01 -3.1441802775e+01 1.4225757860e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.11957126E+03 +0 + 1 2 0.27591127E+00 0.11957126E+03 + 1 -1 0.13516023E-01 0.11957126E+03 + 0.43015636E+02 + + + + 5 1 +6.6372287e+07 2.58481600e+01 7.54677100e-03 1.64911000e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.4121175553e+01 1.4121175553e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -4.5285316425e+02 4.5285316425e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 +1.4043566251e+01 +1.9773590139e+01 -2.4347029469e+02 2.4467529711e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -2.6394599884e+01 -1.3369434684e+00 -6.6055666935e-01 2.6436691472e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +1.2351033633e+01 -1.8436646670e+01 -1.9460113733e+02 1.9586235121e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.25848162E+02 +0 + 1 21 0.21724885E-02 0.25848162E+02 + 1 21 0.69669719E-01 0.25848162E+02 + 0.18427689E+06 + + + + 5 1 +6.6372287e+07 5.64794400e+01 7.54677100e-03 1.41322900e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7881487994e+00 3.7881487994e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -8.3293306677e+02 8.3293306677e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.2042587179e+01 +4.9935512809e+01 -3.4469253261e+02 3.4898764036e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 -1.0400442402e+01 -2.1391345840e+01 -1.5133248122e+02 1.5319033504e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -1.1642144777e+01 -2.8544166969e+01 -3.3311990414e+02 3.3454324016e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.56479436E+02 +0 + 1 21 0.58279180E-03 0.56479436E+02 + 1 1 0.12814362E+00 0.56479436E+02 + 0.18915759E+06 + + + + 5 1 +6.6372287e+07 1.20437600e+02 7.54677100e-03 1.24237700e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.6326880933e+01 7.6326880933e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -5.6424004213e+02 5.6424004213e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.3358633684e+02 -3.2803021929e+01 -2.5791576009e+01 1.3995196687e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +9.1619991808e+01 +2.3833933827e+01 -1.0682884494e+02 1.4273990827e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 +4.1966345031e+01 +8.9690881026e+00 -3.5529274025e+02 3.5787504793e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.12043761E+03 +0 + 1 21 0.11742597E-01 0.12043761E+03 + 1 1 0.86806160E-01 0.12043761E+03 + 0.26061044E+04 + + + + 5 1 +6.6372287e+07 5.37227900e+01 7.54677100e-03 1.42623700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3304377309e+02 1.3304377309e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.0963777946e+01 7.0963777946e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.5841020033e+01 +3.7513476754e+01 -1.3490766536e+01 4.2897548130e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -5.0497052458e+01 -2.8085691040e+00 +6.9558481825e+01 8.6001295113e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 +6.6338072490e+01 -3.4704907650e+01 +6.0122798510e+00 7.5108707789e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.53722789E+02 +0 + 1 21 0.20468273E-01 0.53722789E+02 + 1 21 0.10917504E-01 0.53722789E+02 + 0.14842924E+06 + + + + 5 1 +6.6372287e+07 4.63666500e+01 7.54677100e-03 1.46598600e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.9404929403e+01 7.9404929403e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.0571876305e+01 4.0571876305e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +3.8096004013e+01 -2.0103472745e+01 +5.5898111257e+01 7.0569497520e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -2.4864947573e+01 +3.4826673187e+00 -8.6196401551e+00 2.6546050287e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.3231056440e+01 +1.6620805427e+01 -8.4454180038e+00 2.2861257901e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.46366646E+02 +0 + 1 21 0.12216143E-01 0.46366646E+02 + 1 21 0.62418271E-02 0.46366646E+02 + 0.93143569E+06 + + + + 5 1 +6.6372287e+07 3.23809700e+01 7.54677100e-03 1.57316900e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.7524908213e+01 2.7524908213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -1.6414512109e+02 1.6414512109e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 +1.0735752809e+01 +2.3888282841e+01 -4.2028929504e+01 4.9521079963e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +8.4348718406e+00 -4.8157198822e+01 -9.9473441559e+01 1.1083874971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -1.9170624649e+01 +2.4268915981e+01 +4.8821581875e+00 3.1310199631e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32380969E+02 +0 + 1 21 0.42346013E-02 0.32380969E+02 + 1 21 0.25253095E-01 0.32380969E+02 + 0.46542943E+06 + + + + 5 1 +6.6372287e+07 3.04619800e+01 7.54677100e-03 1.59303800e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.2711935090e+03 2.2711935090e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -2.7585117854e+00 2.7585117854e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.0648195639e+01 -1.1330743851e+01 +3.5163773314e+02 3.5242563628e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -2.3394837542e+01 -2.0181653530e+01 +1.0496115767e+02 1.0941417724e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +2.7466419032e+00 +3.1512397381e+01 +1.8118361064e+03 1.8121122073e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30461979E+02 +0 + 1 21 0.42438824E-03 0.30461979E+02 + 1 2 0.34941289E+00 0.30461979E+02 + 0.91450792E+05 + + + + 5 1 +6.6372287e+07 5.05952900e+01 7.54677100e-03 1.44215700e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7310225510e+01 1.7310225510e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -1.2474989233e+03 1.2474989233e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -2.0960667079e+01 -3.2621697190e+00 -1.3563546253e-01 2.1213432364e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -3.0621090753e+01 +1.1876026904e+01 -3.7308678078e+01 4.9705418959e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +5.1581757832e+01 -8.6138571850e+00 -1.1927443843e+03 1.1938902975e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.50595294E+02 +0 + 1 21 0.26631115E-02 0.50595294E+02 + 1 2 0.19192292E+00 0.50595294E+02 + 0.17875597E+05 + + + + 5 1 +6.6372287e+07 5.35686700e+01 7.54677100e-03 1.42699200e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.0775922111e+01 2.0775922111e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.9382658620e+02 1.9382658620e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -2.0032585171e+01 +8.2484330843e+00 -1.7029580071e+01 2.7556264521e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -3.0721524725e+01 +4.2318877201e+00 -1.1017363691e+01 3.2910534147e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 +5.0754109896e+01 -1.2480320804e+01 -1.4500372033e+02 1.5413570964e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.53568673E+02 +0 + 1 21 0.31962958E-02 0.53568673E+02 + 1 21 0.29819474E-01 0.53568673E+02 + 0.56798716E+06 + + + + 5 1 +6.6372287e+07 3.85025300e+01 7.54677100e-03 1.51953000e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.2289530809e+02 2.2289530809e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 3 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1799706113e+01 1.1799706113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +2.1037505294e+01 +3.1358606708e+01 +8.1763268338e+01 9.0062039134e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +1.7522488983e+01 -1.3904294537e+01 +4.7427977092e+01 5.2438345109e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 3 1 1 2 501 0 -3.8559994278e+01 -1.7454312171e+01 +8.1904356543e+01 9.2194629956e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.38502528E+02 +0 + 1 21 0.34291583E-01 0.38502528E+02 + 1 3 0.18153396E-02 0.38502528E+02 + 0.34255031E+05 + + + + 5 1 +6.6372287e+07 3.75647200e+01 7.54677100e-03 1.52693700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.1007388093e+00 5.1007388093e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.1898429651e+03 1.1898429651e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.3577055731e+01 +8.6164758199e+00 -5.6840562318e+02 5.6895964152e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.7661209766e+01 -3.2338872055e+01 -8.6569325177e+01 9.4084903347e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +4.1238265497e+01 +2.3722396235e+01 -5.2976727797e+02 5.3189915908e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.37564724E+02 +0 + 1 21 0.78472893E-03 0.37564724E+02 + 1 21 0.18305279E+00 0.37564724E+02 + 0.98499507E+05 + + + + 5 1 +6.6372287e+07 1.43855200e+02 7.54677100e-03 1.20823400e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7613958321e+02 1.7613958321e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.5721546644e+02 2.5721546644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +1.8697441469e+01 +4.6743956885e+01 +6.3444037986e+01 8.0992208136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 501 0 +1.5605198954e+01 -1.3052773015e+02 -2.0496954320e+02 2.4350261644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 -3.4302640423e+01 +8.3783773266e+01 +6.0449621991e+01 1.0886022507e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.14385516E+03 +0 + 1 21 0.27098398E-01 0.14385516E+03 + 1 21 0.39571610E-01 0.14385516E+03 + 0.81522626E+04 + + + + 5 1 +6.6372287e+07 2.99475600e+01 7.54677100e-03 1.59866900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.1735167452e+02 1.1735167452e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.7731005686e+01 6.7731005686e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -3.0037339775e+01 +1.9906567203e+01 +7.7442255748e+01 8.5415549954e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 +3.9581584148e+00 -2.3241579138e+01 -5.6222098856e+01 6.0965255831e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +2.6079181360e+01 +3.3350119355e+00 +2.8400511938e+01 3.8701874417e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29947557E+02 +0 + 1 21 0.18054104E-01 0.29947557E+02 + 1 21 0.10420155E-01 0.29947557E+02 + 0.19218372E+06 + + + + 5 1 +6.6372287e+07 5.51267100e+01 7.54677100e-03 1.41950100e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.3605621517e+02 3.3605621517e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.6748192173e+02 1.6748192173e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -3.6502084271e+01 +1.2779288039e+01 +2.6845966100e+00 3.8767504664e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 +5.6330187720e+01 -2.0382049136e+01 -1.4305805077e+02 1.5509391950e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 -1.9828103448e+01 +7.6027610964e+00 +3.0894774760e+02 3.0967671273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.55126711E+02 +0 + 1 21 0.51700956E-01 0.55126711E+02 + 1 21 0.25766450E-01 0.55126711E+02 + 0.55367291E+04 + + + + 5 1 +6.6372287e+07 2.38060800e+01 7.54677100e-03 1.67876700e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.6292151055e+01 5.6292151055e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -1.0100350383e+02 1.0100350383e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -1.7078783867e+01 +1.3980353782e+01 +4.3115131723e+01 4.8436037552e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 +1.9550036137e+01 +1.1807248332e+01 -6.8557268747e+00 2.3845670826e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -2.4712522707e+00 -2.5787602114e+01 -8.0970757619e+01 8.5013946502e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.23806083E+02 +0 + 1 21 0.86603311E-02 0.23806083E+02 + 1 21 0.15539000E-01 0.23806083E+02 + 0.32926995E+06 + + + + 5 1 +6.6372287e+07 4.37816800e+01 7.54677100e-03 1.48209400e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.6069687389e+01 7.6069687389e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.3690305890e+01 6.3690305890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -3.1851837126e+01 -2.0905814741e+01 -1.0380698209e+01 3.9488625117e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +5.2242018124e+01 +1.1087919075e+01 -1.5683784728e+01 5.5661041227e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 503 -2.0390180998e+01 +9.8178956656e+00 +3.8443864437e+01 4.4610326936e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.43781676E+02 +0 + 1 21 0.97985087E-02 0.43781676E+02 + 1 -2 0.11703029E-01 0.43781676E+02 + 0.29443686E+05 + + + + 5 1 +6.6372287e+07 6.86898000e+01 7.54677100e-03 1.36460600e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.1667938332e+02 2.1667938332e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5467804492e+02 1.5467804492e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 505 -1.1038053112e+01 +2.3433804553e+01 -2.7774504919e+01 3.7979006514e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -1.5088204651e+01 +3.2631253632e+01 -1.1446451215e+02 1.1997740276e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +2.6126257764e+01 -5.6065058184e+01 +2.0424035546e+02 2.1340101896e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.68689805E+02 +0 + 1 21 0.33335290E-01 0.68689805E+02 + 1 21 0.23796622E-01 0.68689805E+02 + 0.15057095E+05 + + + + 5 1 +6.6372287e+07 5.62723200e+01 7.54677100e-03 1.41417600e-01 + -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.2380787192e+02 1.2380787192e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.0327409785e+03 2.0327409785e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +2.3729827285e+01 -2.7343988978e+01 -1.7426108947e+03 1.7429869560e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -5.3672579255e+00 -2.5936900616e+01 -2.8286809267e+02 2.8410541727e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -2 1 1 2 0 502 -1.8362569359e+01 +5.3280889594e+01 +1.1654588079e+02 1.2945647716e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.56272325E+02 +0 + 1 21 0.31272939E+00 0.56272325E+02 + 1 -2 0.19047365E-01 0.56272325E+02 + 0.69988168E+01 + + + + 5 1 +6.6372287e+07 3.06264600e+01 7.54677100e-03 1.59126500e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.3143843443e+00 7.3143843443e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8991776011e+02 2.8991776011e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -2.7176763448e+01 -5.9122717629e+00 -8.2589250105e+01 8.7146518358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +3.7114675978e+00 -2.1312114619e+01 -3.4556109190e+01 4.0768933069e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 +2.3465295850e+01 +2.7224386382e+01 -1.6545801647e+02 1.6931669302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.30626458E+02 +0 + 1 21 0.11252898E-02 0.30626458E+02 + 1 21 0.44602736E-01 0.30626458E+02 + 0.12571056E+07 + + + + 5 1 +6.6372287e+07 2.39754200e+01 7.54677100e-03 1.67617000e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.1086457142e+00 3.1086457142e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -1.1096565876e+03 1.1096565876e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.8074701851e+01 -1.2471200900e+01 -4.7301063874e+01 5.2149940964e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -1.5797907864e+01 -1.5878819740e+01 -2.9957929976e+02 3.0041549170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -2.2767939866e+00 +2.8350020640e+01 -7.5966757822e+02 7.6019980061e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.23975415E+02 +0 + 1 21 0.47825339E-03 0.23975415E+02 + 1 2 0.17071633E+00 0.23975415E+02 + 0.26724670E+06 + + + + 5 1 +6.6372287e+07 2.53344800e+01 7.54677100e-03 1.65624400e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.1570098369e+02 5.1570098369e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.8445706221e+02 1.8445706221e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 +2.4207533199e+01 -7.3828354628e+00 +5.0432335498e+02 5.0495797578e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +8.1618412819e+00 -2.1011090789e+01 -6.9595529015e+00 2.3590611815e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 -3.2369374481e+01 +2.8393926251e+01 -1.6611988060e+02 1.7160945831e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.25334477E+02 +0 + 1 21 0.79338612E-01 0.25334477E+02 + 1 -1 0.28378010E-01 0.25334477E+02 + 0.16118883E+03 + + + + 5 1 +6.6372287e+07 4.96978700e+01 7.54677100e-03 1.44697800e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.1669869678e+01 4.1669869678e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -7.2169785693e+02 7.2169785693e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.0137451528e+01 -1.9749989160e+01 -4.1326305480e+02 4.1385889197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +7.1118147579e-01 -2.6001039854e+01 -2.9419229654e+02 2.9533991805e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 +9.4262700527e+00 +4.5751029015e+01 +2.7427364092e+01 5.4168916585e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.49697867E+02 +0 + 1 21 0.11103045E+00 0.49697867E+02 + 1 -1 0.64107485E-02 0.49697867E+02 + 0.70621395E+03 + + + + 5 1 +6.6372287e+07 4.01879700e+01 7.54677100e-03 1.50683300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.1802510669e+01 7.1802510669e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.1337629671e+01 7.1337629671e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +5.8213069507e+00 +5.5036070403e+01 -5.3681175091e+00 5.5602817785e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -7.7116246928e+00 -3.4472047188e+01 -2.9891185989e+01 4.6273904012e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +1.8903177421e+00 -2.0564023215e+01 +3.5724184496e+01 4.1263418543e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40187974E+02 +0 + 1 21 0.11046540E-01 0.40187974E+02 + 1 21 0.10975020E-01 0.40187974E+02 + 0.42308800E+06 + + + + 5 1 +6.6372287e+07 2.09126700e+01 7.54677100e-03 1.72776500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.1568669659e+02 2.1568669659e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4598327407e+01 4.4598327407e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.0489102981e+01 +2.1031084145e+01 +3.3096570840e+00 2.3733554552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 505 -2.0964401963e+01 -3.6307327249e+00 +1.9854727555e+02 1.9968402289e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 +1.0475298982e+01 -1.7400351420e+01 -3.0768563453e+01 3.6867446552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.20912670E+02 +0 + 1 21 0.33182568E-01 0.20912670E+02 + 1 21 0.68612813E-02 0.20912670E+02 + 0.12384532E+06 + + + + 5 1 +6.6372287e+07 2.76487000e+01 7.54677100e-03 1.62563500e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +8.8315219304e+00 8.8315219304e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -8.5548998670e+02 8.5548998670e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -2.6108367324e+01 +2.7834397992e+01 -5.0511958607e+01 6.3307649761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 3 1 1 2 501 0 -1.5502416378e+00 -2.7227504772e+01 -8.3708945045e+01 8.8039353392e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -3 1 1 2 0 503 +2.7658608962e+01 -6.0689322000e-01 -7.1243756111e+02 7.1297450547e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27648700E+02 +0 + 1 21 0.13586955E-02 0.27648700E+02 + 1 21 0.13161386E+00 0.27648700E+02 + 0.94752714E+05 + + + + 5 1 +6.6372287e+07 3.42335000e+01 7.54677100e-03 1.55551500e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5344269194e+01 5.5344269194e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5616791251e+02 5.5616791251e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 502 -5.3992539344e+01 -1.9093964990e+01 -4.2937502340e+02 4.3317742845e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 501 +2.0534474104e+01 +1.3341922147e+01 -1.1811465484e+02 1.2062646145e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +3.3458065241e+01 +5.7520428423e+00 +4.6666034926e+01 5.7708291798e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.34233500E+02 +0 + 1 21 0.85564297E-01 0.34233500E+02 + 1 -1 0.85145026E-02 0.34233500E+02 + 0.83709286E+03 + + + + 5 1 +6.6372287e+07 3.19061800e+01 7.54677100e-03 1.57792600e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.0941997221e+01 3.0941997221e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4106991043e+02 1.4106991043e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -7.0637436764e+00 -2.8412231374e+01 -9.0188811038e+01 9.4821796033e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.2887679806e+01 +4.9262952225e+01 -2.1945566891e+01 5.5448522603e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -5.8239361293e+00 -2.0850720851e+01 +2.0064647242e+00 2.1741589011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.31906182E+02 +0 + 1 21 0.47603074E-02 0.31906182E+02 + 1 21 0.21703063E-01 0.31906182E+02 + 0.50542998E+06 + + + + 5 1 +6.6372287e+07 3.48184100e+01 7.54677100e-03 1.55021900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.1589074046e+03 2.1589074046e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.5961454167e+01 1.5961454167e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +2.0024453480e+01 +1.8334123955e+01 +1.9389015022e+03 1.9390915796e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -3.0260374134e+01 +1.8529083324e+01 +8.4480836704e+00 3.6474474489e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +1.0235920654e+01 -3.6863207279e+01 +1.9559636455e+02 1.9930280466e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.34818413E+02 +0 + 1 21 0.33213937E+00 0.34818413E+02 + 1 21 0.24556100E-02 0.34818413E+02 + 0.27565033E+04 + + + + 5 1 +6.6372287e+07 3.59920600e+01 7.54677100e-03 1.53996300e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +9.7186356638e+01 9.7186356638e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -2.5874356624e+02 2.5874356624e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -1.8438172649e+01 -2.1235714101e+01 -8.7075130972e+01 9.1504099350e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +5.0238810426e+01 +4.6817397829e+00 -1.6209235767e+02 1.6976392189e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -3.1800637777e+01 +1.6553974318e+01 +8.7610279039e+01 9.4661901640e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.35992060E+02 +0 + 1 21 0.14951747E-01 0.35992060E+02 + 1 21 0.39806702E-01 0.35992060E+02 + 0.24770639E+05 + + + + 5 1 +6.6372287e+07 4.43353000e+01 7.54677100e-03 1.47853400e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.1509889175e+01 4.1509889175e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5021838758e+02 1.5021838758e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +2.1728377512e+01 -4.5038835640e+00 +4.7828765006e+00 2.2699851631e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +3.7288465851e+01 +1.8488032542e+01 -9.7938315943e+01 1.0641499313e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -5.9016843363e+01 -1.3984148978e+01 -1.5553058964e+01 6.2613431996e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.44335296E+02 +0 + 1 21 0.63861368E-02 0.44335296E+02 + 1 21 0.23110521E-01 0.44335296E+02 + 0.28976826E+06 + + + + 5 1 +6.6372287e+07 8.95697900e+01 7.54677100e-03 1.30389700e-01 + -1 -1 0 0 0 502 -0.0000000000e+00 +0.0000000000e+00 +1.0959072147e+02 1.0959072147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 -1 0 0 0 501 +0.0000000000e+00 -0.0000000000e+00 -2.3941333748e+02 2.3941333748e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +4.4433031586e+01 -8.6837769029e+00 +3.1109742872e+01 5.4931943155e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -4 1 1 2 0 502 -7.9188610214e+01 +3.3845172129e+01 -2.1114031298e+02 2.2802754971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 +3.4755578628e+01 -2.5161395226e+01 +5.0207954089e+01 6.6044566085e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.89569794E+02 +0 + 1 -4 0.36832821E-01 0.89569794E+02 + 1 -1 0.16860111E-01 0.89569794E+02 + 0.65999041E+02 + + + + 5 1 +6.6372287e+07 3.30531100e+01 7.54677100e-03 1.56660200e-01 + 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.4577216351e+00 7.4577216351e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.9293771215e+03 2.9293771215e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -8.0432547113e+00 +3.1734139686e+01 -3.2819056421e+01 4.6355582537e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -6.2607331878e+00 +2.0255158476e+01 -2.7252772679e+02 2.7335111139e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +1.4303987899e+01 -5.1989298162e+01 -2.6165726167e+03 2.6171281493e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.33053112E+02 +0 + 1 21 0.11473423E-02 0.33053112E+02 + 1 2 0.45067321E+00 0.33053112E+02 + 0.95448503E+04 + + + + 5 1 +6.6372287e+07 2.64822300e+01 7.54677100e-03 1.64058200e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.0433495267e+02 2.0433495267e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.1621962120e+01 1.1621962120e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -6.1453446806e-01 +2.8655724578e+01 +2.6926488695e+01 3.9326377880e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 +1.9294771642e+01 -8.5428375362e+00 +2.2933975834e+01 3.1164651987e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 -1.8680237174e+01 -2.0112887042e+01 +1.4285252602e+02 1.4546588493e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.26482232E+02 +0 + 1 21 0.17879943E-02 0.26482232E+02 + 1 2 0.31436144E-01 0.26482232E+02 + 0.22563568E+06 + + + + 5 1 +6.6372287e+07 2.94114000e+01 7.54677100e-03 1.60468700e-01 + 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +7.1456949129e+01 7.1456949129e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 -1 0 0 502 0 +0.0000000000e+00 -0.0000000000e+00 -3.2341513368e+01 3.2341513368e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.2633702387e+01 +1.6779116547e+01 +3.5382422326e+01 4.1146871057e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +3.4850371802e+01 -4.6645490224e-01 +1.2205098572e-01 3.4853706995e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -2.2216669414e+01 -1.6312661644e+01 +3.6109624495e+00 2.7797884444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.29411403E+02 +0 + 1 2 0.49756174E-02 0.29411403E+02 + 1 1 0.10993377E-01 0.29411403E+02 + 0.84192782E+04 + + + + 5 1 +6.6372287e+07 4.12907800e+01 7.54677100e-03 1.49892200e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.6408792035e+01 1.6408792035e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8426721197e+02 2.8426721197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -9.0227533210e+00 +4.0120314210e+01 -1.6777925940e+02 1.7274527367e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -2.8940986847e+01 -8.3287328478e+00 -3.5522385380e+01 4.6570252025e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +3.7963740168e+01 -3.1791581362e+01 -6.4556775153e+01 8.1360478312e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.41290780E+02 +0 + 1 21 0.25244298E-02 0.41290780E+02 + 1 21 0.43733413E-01 0.41290780E+02 + 0.38789728E+06 + + + + 5 1 +6.6372287e+07 3.01075800e+01 7.54677100e-03 1.59690300e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.0837614737e+02 1.0837614737e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -5.0187655412e+02 5.0187655412e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -2.4244911852e+01 +2.4839805338e+00 -4.4874897660e+01 5.1066058689e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 1 1 1 2 502 0 +9.4171551279e+00 +2.3796264682e+01 -4.5137403450e+02 4.5209895382e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 502 +1.4827756724e+01 -2.6280245215e+01 +1.0274852541e+02 1.0708768898e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.30107583E+02 +0 + 1 1 0.77211778E-01 0.30107583E+02 + 1 -1 0.16673253E-01 0.30107583E+02 + 0.11578566E+03 + + + + 5 1 +6.6372287e+07 2.75831300e+01 7.54677100e-03 1.62645100e-01 + 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +1.5119270639e+01 1.5119270639e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 504 503 -0.0000000000e+00 -0.0000000000e+00 -4.4797372094e+02 4.4797372094e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.3933439080e+01 -1.5349180970e+01 -7.6519214979e+00 2.9444163518e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 -9.1468995899e+00 +2.8848403826e+01 -5.3965016364e+01 6.1871796213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 503 -1.4786539490e+01 -1.3499222856e+01 -3.7123751244e+02 3.7177703185e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27583132E+02 +0 + 1 21 0.23260418E-02 0.27583132E+02 + 1 21 0.68919030E-01 0.27583132E+02 + 0.17059051E+06 + + + + 5 1 +6.6372287e+07 5.38232700e+01 7.54677100e-03 1.42574700e-01 + 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +8.6931327192e+02 8.6931327192e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3878826862e+01 1.3878826862e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.1384767832e+01 -5.5414417532e+01 +5.7810173998e+02 5.8086314427e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 503 +4.8586431572e+01 +3.3136350907e+01 +2.3303180192e+02 2.4033826119e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 504 -3.7201663740e+01 +2.2278066625e+01 +4.4300903156e+01 6.1990693318e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.53823273E+02 +0 + 1 21 0.13374046E+00 0.53823273E+02 + 1 21 0.21352048E-02 0.53823273E+02 + 0.45957317E+05 + + + + 5 1 +6.6372287e+07 4.09907500e+01 7.54677100e-03 1.50104500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.0992522075e+01 7.0992522075e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3949703176e+02 1.3949703176e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +1.1659851347e+01 -5.1077643128e+01 +4.3516059870e+01 6.8106719401e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +3.0482205563e+00 +2.8158917183e+01 -1.2900096167e+00 2.8352784524e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 503 -1.4708071904e+01 +2.2918725946e+01 -1.1073055994e+02 1.1403004991e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.40990751E+02 +0 + 1 21 0.10921927E-01 0.40990751E+02 + 1 21 0.21461082E-01 0.40990751E+02 + 0.13404950E+06 + + + + 5 1 +6.6372287e+07 3.36149800e+01 7.54677100e-03 1.56125500e-01 + 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +7.9720643936e+01 7.9720643936e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1015809106e+02 1.1015809106e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +2.0832938167e+01 +2.8798086026e+01 -8.7908167480e+01 9.4821869741e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 4 1 1 2 502 0 -3.2596791056e+01 +4.0076403627e+00 +4.9338106023e+01 5.9269390704e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 501 +1.1763852889e+01 -3.2805726389e+01 +8.1326143350e+00 3.5787474550e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33614975E+02 +0 + 1 21 0.12264714E-01 0.33614975E+02 + 1 1 0.16947399E-01 0.33614975E+02 + 0.16891392E+05 + + + + 5 1 +6.6372287e+07 5.25551000e+01 7.54677100e-03 1.43202800e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.5821819927e+02 4.5821819927e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -1.2383942127e+01 1.2383942127e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -5.0926951541e+01 -5.0815690566e+00 +5.0883321493e+01 7.2169863125e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 +2.4440122995e+01 +3.5331599133e+00 +1.4625618020e+02 1.4832623867e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 +2.6486828546e+01 +1.5484091433e+00 +2.4869475544e+02 2.5010603960e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.52555102E+02 +0 + 1 21 0.19052222E-02 0.52555102E+02 + 1 -1 0.70495095E-01 0.52555102E+02 + 0.23587364E+05 + + + + 5 1 +6.6372287e+07 4.46278200e+01 7.54677100e-03 1.47667900e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.5593250322e+02 5.5593250322e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8836808285e+02 1.8836808285e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 504 +4.0713374762e+01 +1.7975906273e+01 +5.4492854609e+02 5.4674293085e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 -4.8243013951e+01 -3.8385710795e+01 -1.0265016479e+02 1.1974100183e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 501 +7.5296391891e+00 +2.0409804522e+01 -7.4713960932e+01 7.7816653392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.44627825E+02 +0 + 1 21 0.85528077E-01 0.44627825E+02 + 1 21 0.28979705E-01 0.44627825E+02 + 0.15745256E+04 + + + + 5 1 +6.6372287e+07 3.37382900e+01 7.54677100e-03 1.56009900e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.5881756914e+03 1.5881756914e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.3626711614e+01 1.3626711614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 -2.5839259073e+01 -9.7303516134e+00 +1.6154599506e+02 1.6388854618e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 501 -1.7069497306e+01 +2.9008026152e+01 +1.1420766537e+01 3.5542470761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 2 1 1 2 501 0 +4.2908756379e+01 -1.9277674538e+01 +1.4015822182e+03 1.4023713861e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.33738290E+02 +0 + 1 21 0.20964184E-02 0.33738290E+02 + 1 2 0.24433458E+00 0.33738290E+02 + 0.17276941E+05 + + + + 5 1 +6.6372287e+07 3.21182600e+01 7.54677100e-03 1.57578900e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.7751679366e+02 4.7751679366e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.2525410541e+01 2.2525410541e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -1.0544572028e+01 +2.8771952473e+01 +1.5170782840e+02 1.5477169782e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +2.6106310540e+01 +5.1888225486e+00 -1.0981232855e+01 2.8793242340e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -1.5561738512e+01 -3.3960775022e+01 +3.1426478757e+02 3.1647726404e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.32118265E+02 +0 + 1 21 0.73464115E-01 0.32118265E+02 + 1 21 0.34654481E-02 0.32118265E+02 + 0.80826114E+05 + + + + 5 1 +6.6372287e+07 4.09504400e+01 7.54677100e-03 1.50133100e-01 + 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.7536299147e+02 1.7536299147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.6964015600e+01 3.6964015600e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +4.0714726769e+00 +5.7850454719e+01 +1.0890596398e+00 5.8003776186e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 +4.7732900088e+00 -1.9654732976e+01 +1.3161726099e+01 2.4131387440e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 504 -8.8447626857e+00 -3.8195721743e+01 +1.2414819013e+02 1.3019184344e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.40950439E+02 +0 + 1 21 0.26978921E-01 0.40950439E+02 + 1 21 0.56867717E-02 0.40950439E+02 + 0.26246574E+06 + + + + 5 1 +6.6372287e+07 2.73967700e+01 7.54677100e-03 1.62878500e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +9.4621731867e+02 9.4621731867e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -5.2344682715e+02 5.2344682715e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 +3.5097651064e+01 +5.1743549524e+00 +5.7624905467e+01 6.7670146960e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -1.6769759782e+01 -2.1740470328e+01 +8.8306196865e+02 8.8348871717e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 505 502 -1.8327891283e+01 +1.6566115375e+01 -5.1791638260e+02 5.1850528169e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.27396771E+02 +0 + 1 21 0.14557190E+00 0.27396771E+02 + 1 21 0.80530281E-01 0.27396771E+02 + 0.67303146E+02 + + + + 5 1 +6.6372287e+07 8.17190300e+01 7.54677100e-03 1.32424300e-01 + 1 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +3.6186655864e+02 3.6186655864e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -8.5536214190e+01 8.5536214190e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +6.9748665717e+01 -2.1225395602e+01 +1.1519982450e+02 1.3633192345e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 501 0 +1.4237054539e+01 +2.7062837406e+01 +2.1101094416e+02 2.1321517170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 1 1 1 2 502 0 -8.3985720256e+01 -5.8374418043e+00 -4.9880424207e+01 9.7855677673e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.81719032E+02 +0 + 1 1 0.55671777E-01 0.81719032E+02 + 1 1 0.13159418E-01 0.81719032E+02 + 0.34676915E+03 + + + + 5 1 +6.6372287e+07 3.21808800e+01 7.54677100e-03 1.57516200e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.6785736656e+03 1.6785736656e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.6055706089e+00 2.6055706089e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 501 -1.3779710329e+01 -3.5600343603e+01 +9.7923046116e+02 9.7997426545e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.3134215327e+01 +6.9265799703e+00 +7.6205484473e+01 7.9940260774e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + -1 1 1 2 0 504 -9.3545049987e+00 +2.8673763633e+01 +6.2053214934e+02 6.2126470997e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.32180882E+02 +0 + 1 21 0.40085836E-03 0.32180882E+02 + 1 -1 0.25824124E+00 0.32180882E+02 + 0.94656396E+04 + + + + 5 1 +6.6372287e+07 2.72410900e+01 7.54677100e-03 1.63075300e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +4.0924620761e+01 4.0924620761e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.0688357873e+02 1.0688357873e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 502 +1.4260677542e+01 +2.3044763190e+01 +2.6464656953e+01 3.7878834487e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -3.4418262802e+01 -1.1596463544e+01 -5.7672141314e+01 6.8155488882e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 +2.0157585259e+01 -1.1448299646e+01 -3.4751473609e+01 4.1773876122e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.27241089E+02 +0 + 1 21 0.62960956E-02 0.27241089E+02 + 1 21 0.16443627E-01 0.27241089E+02 + 0.51232088E+06 + + + + 5 1 +6.6372287e+07 8.96432400e+01 7.54677100e-03 1.30371800e-01 + 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.9106265508e+03 1.9106265508e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -8.3144018617e+00 8.3144018617e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 +2.0112949282e+01 -5.3085600688e+01 +9.4969449400e+02 9.5138963820e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 505 -4.7004645833e+00 -3.1846193563e+01 +6.9718679537e+02 6.9792958244e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 502 -1.5412484699e+01 +8.4931794252e+01 +2.5543085956e+02 2.6962173201e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.89643239E+02 +0 + 1 21 0.29394220E+00 0.89643239E+02 + 1 21 0.12791403E-02 0.89643239E+02 + 0.10444343E+05 + + + + 5 1 +6.6372287e+07 3.51788400e+01 7.54677100e-03 1.54701800e-01 + 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.4477402762e+01 2.4477402762e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.3548028651e+02 1.3548028651e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 +5.2535145793e+00 -1.9356004202e+01 -7.2383362573e+01 7.5110621695e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 503 -1.7320229400e+01 -2.4467895105e+01 -1.3062058210e+01 3.2699932753e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + -4 1 1 2 0 502 +1.2066714821e+01 +4.3823899307e+01 -2.5557462961e+01 5.2147134821e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.35178836E+02 +0 + 1 21 0.37657543E-02 0.35178836E+02 + 1 -4 0.20843121E-01 0.35178836E+02 + 0.26617371E+05 + + + + 5 1 +6.6372287e+07 5.53124500e+01 7.54677100e-03 1.41862700e-01 + 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.4660412160e+02 2.4660412160e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.8448765078e+02 7.8448765078e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 505 -4.2504266063e+01 -2.5589506438e+01 -2.1846702105e+02 2.2402962920e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +6.8994906150e+01 -2.2979517961e+01 -5.5770982325e+02 5.6243097556e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 505 501 -2.6490640086e+01 +4.8569024399e+01 +2.3829331512e+02 2.4463116762e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.55312446E+02 +0 + 1 21 0.37939096E-01 0.55312446E+02 + 1 21 0.12069041E+00 0.55312446E+02 + 0.42114463E+03 + + + + 5 1 +6.6372287e+07 3.03757500e+01 7.54677100e-03 1.59397200e-01 + 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.2971462251e+03 1.2971462251e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -5.6448232432e+00 5.6448232432e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 503 504 -2.3055791365e+00 +2.0675048888e+01 +2.1686626098e+02 2.1786176464e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 504 502 -2.9371916466e+01 -8.2619650311e+00 +4.2957332697e+01 5.2690625120e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 2 1 1 2 501 0 +3.1677495603e+01 -1.2413083857e+01 +1.0316778082e+03 1.0322386586e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 + + 3 0.30375748E+02 +0 + 1 21 0.86843538E-03 0.30375748E+02 + 1 2 0.19956072E+00 0.30375748E+02 + 0.91540288E+05 + + + + 5 1 +6.6372287e+07 4.60134200e+01 7.54677100e-03 1.46811200e-01 + -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.7829784629e+01 1.7829784629e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.8514522110e+02 2.8514522110e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + 21 1 1 2 503 501 -3.0860485485e+01 +3.4588417908e+01 -6.7479109992e+01 8.1866711813e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + 21 1 1 2 504 502 +2.6117598747e+01 +1.5550519447e+01 -1.3824680405e+02 1.4154902490e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 + -1 1 1 2 0 504 +4.7428867379e+00 -5.0138937355e+01 -6.1589522426e+01 7.9559269015e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 + + 3 0.46013420E+02 +0 + 1 21 0.43868495E-01 0.46013420E+02 + 1 -1 0.27430438E-02 0.46013420E+02 + 0.16895399E+05 + + +
From f755c17011dbce2b03a882fdbdac0307be0369d1 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 13 Mar 2024 15:35:04 +0100 Subject: [PATCH 15/76] added proper makefiles for rwgt_runners and rwgt_driver --- .../template_files/gpu/cudacpp_rex_driver.mk | 1049 +++++++++++++++++ .../{cudacpp_rex.mk => cudacpp_rex_runner.mk} | 15 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 19 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 24 +- tools/REX/rwgt_driver.cc | 2 +- tools/REX/rwgt_instance.h | 2 + tools/REX/rwgt_runner.cc | 4 +- tools/REX/teawREX.hpp | 4 - 8 files changed, 1099 insertions(+), 20 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk rename epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/{cudacpp_rex.mk => cudacpp_rex_runner.mk} (98%) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk new file mode 100644 index 0000000000..3a8c3e3e98 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk @@ -0,0 +1,1049 @@ +# Copyright (C) 2020-2023 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: this is crucial for MG5aMC flag consistency/documentation +# AV: temporarely comment this out because it breaks cudacpp builds +ifneq ($(wildcard ../Source/make_opts),) +include ../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +MG5AMC_COMMONLIB = mg5amc_common +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) +override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) +override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else +override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../test +TESTDIRLOCAL = ../test +ifneq ($(wildcard $(GTEST_ROOT)),) +TESTDIR = +else ifneq ($(LOCALGTEST),) +TESTDIR=$(TESTDIRLOCAL) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../epochX/cudacpp/CODEGEN),) +TESTDIR = $(TESTDIRCOMMON) +GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else +TESTDIR = +endif +ifneq ($(GTEST_ROOT),) +GTESTLIBDIR = $(GTEST_ROOT)/lib64/ +GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a +GTESTINC = -I$(GTEST_ROOT)/include +else +GTESTLIBDIR = +GTESTLIBS = +GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +CXXFLAGS += -mmacosx-version-min=11.3 +endif + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. +# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. +# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. +# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) +# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below +ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") + override CUDA_HOME=disabled +endif + +# If CUDA_HOME is not set, try to set it from the path to nvcc +ifndef CUDA_HOME + CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") +endif + +# If HIP_HOME is not set, try to set it from the path to hipcc +ifndef HIP_HOME + HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + $(warning HIP_HOME was not set: using "$(HIP_HOME)") +endif + +# FIXME! (AV 24.01.2024) +# In the current implementation (without separate builds for C++ and CUDA/HIP), +# builds are performed for HIP only if CUDA is not found in the path. +# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. +# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). + +#--- Option 1: CUDA exists -> use CUDA + +# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists +ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) + + GPUCC = $(CUDA_HOME)/bin/nvcc + USE_NVTX ?=-DUSE_NVTX + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # Embed device code for 70, and PTX for 70+. + # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + CUINC = -I$(CUDA_HOME)/include/ + CUOPTFLAGS = -lineinfo + ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + CUBUILDRULEFLAGS = -Xcompiler -fPIC -c + CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu + CUDATESTFLAGS = -lcuda + + # Set the host C++ compiler for GPUCC via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifneq ($(origin REQUIRE_CUDA),undefined) + + # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) + $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) + +#--- Option 2: CUDA does not exist, HIP exists -> use HIP + +# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists +else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) + + GPUCC = $(HIP_HOME)/bin/hipcc + #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? + HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a + HIPINC = -I$(HIP_HOME)/include/ + # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP + # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) + # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) + GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC + ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow + GPUFLAGS += -std=c++17 + ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) + CUBUILDRULEFLAGS = -fPIC -c + CCBUILDRULEFLAGS = -fPIC -c -x hip + +else ifneq ($(origin REQUIRE_HIP),undefined) + + # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) + $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) + +#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP + +else + + # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ + $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) + $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) + override GPUCC= + override USE_NVTX= + override CUINC= + override HIPINC= + +endif + +# Export GPUCC (so that it can also be used in cudacpp_src.mk?) +export GPUCC +export GPUFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 + # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= -Xcompiler -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) +override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) +override OMPFLAGS = -fopenmp +###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) +override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) +###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else +override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms +###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +# Set the default AVX (vectorization) choice +ifeq ($(AVX),) + ifeq ($(UNAME_P),ppc64le) + ###override AVX = none + override AVX = sse4 + else ifeq ($(UNAME_P),arm) + ###override AVX = none + override AVX = sse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override AVX = none + $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override AVX = 512y + ###$(info Using AVX='$(AVX)' as no user input exists) + else + override AVX = avx2 + ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + $(warning Using AVX='$(AVX)' because host does not support avx512vl) + else + $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) + endif + endif +else + ###$(info Using AVX='$(AVX)' according to user input) +endif + +# Set the default FPTYPE (floating point type) choice +ifeq ($(FPTYPE),) + override FPTYPE = d +endif + +# Set the default HELINL (inline helicities?) choice +ifeq ($(HELINL),) + override HELINL = 0 +endif + +# Set the default HRDCOD (hardcode cIPD physics parameters?) choice +ifeq ($(HRDCOD),) + override HRDCOD = 0 +endif + +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +export AVX +export FPTYPE +export HELINL +export HRDCOD +export OMPFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASHIPRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each AVX choice (example: "make AVX=none") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +$(info AVX=$(AVX)) +ifeq ($(UNAME_P),ppc64le) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(AVX),sse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifneq ($(AVX),none) + $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(AVX),none) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(AVX),sse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +else + ifeq ($(AVX),none) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(AVX),sse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(AVX),avx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(AVX),512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(AVX),512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + else + $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) + endif +endif +# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? +CXXFLAGS+= $(AVXFLAGS) + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE=$(FPTYPE)) +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL=$(HELINL)) +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD=$(HRDCOD)) +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASHIPRAND=$(HASHIPRAND)) + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build directory "short" tag (defines target and path to the optional build directory) +# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) +override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +ifeq ($(USEBUILDDIR),1) + override BUILDDIR = build.$(DIRTAG) + override LIBDIR = ../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) +else + override BUILDDIR = . + override LIBDIR = ../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override CULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override CULIBFLAGSRPATH2 = +else + # RPATH to cuda/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking cuda/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + + +.PHONY: all $(DIRS) + +# Assuming DIRS is defined as before +DIRS := $(wildcard P*) + +# Construct the library paths +rwgtlib := $(addprefix ,$(addsuffix /librwgt.a,$(DIRS))) + +cxx_rwgt=$(BUILDDIR)/rwgt.exe +ifneq ($(GPUCC),) +cu_rwgt=$(BUILDDIR)/grwgt.exe +grwgtlib := $(addprefix $(DIRS)/,libgrwgt.a) +else +cu_rwgt= +grwgtlib= +endif +ifneq ($(GTESTLIBS),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUOPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Generic target and build rules: objects from CUDA or HIP compilation +# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) +ifneq ($(GPUCC),) +$(BUILDDIR)/%%.o : %%.cu *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ + +$(BUILDDIR)/%%_cu.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ +endif + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUINC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ + +# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math +ifeq ($(findstring nvcc,$(GPUCC)),nvcc) + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math +else + $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math +endif +endif + +# # Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) +# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) +# $(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) + +# # Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o +# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) +# ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) +# endif +# ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +# endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +#processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +#MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +#cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o +#cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o + +#ifneq ($(GPUCC),) +#MG5AMC_CULIB = mg5amc_$(processid_short)_cuda +#cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o +#cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o +#endif + +# Target (and build rules): C++ and CUDA shared libraries +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o +#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so +# $(CXX) -shared -o $@ $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +#$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o +#$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o +$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# Bypass std::filesystem completely to ease portability on LUMI #803 +ifneq ($(findstring hipcc,$(GPUCC)),) + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +else + $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +#HERE LOOP MAKE OVER P DIRECTORIES AND ADD RWGT_RUNNER_LIBS +# Ensure each librwgt.a depends on its directory being built +$(rwgtlib): + @$(MAKE) -C $(@D) VARIABLE=true + +# Target (and build rules): C++ and CUDA standalone executables +$(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(rwgtlib) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(rwgtlib) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(cu_rwgt): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(cu_rwgt): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(cu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(cu_rwgt): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cu_rwgt): rwgtlibs $(BUILDDIR)/grwgt.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(DIRS) + $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(grwgtlib) $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +#$(BUILDDIR)/%%.o : %%.f *.inc +# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +# $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc + +#ifeq ($(UNAME_S),Darwin) +#$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +#endif +#$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +#$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +#ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +#else +# $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +#endif + +# ifneq ($(GPUCC),) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# endif +# ifeq ($(UNAME_S),Darwin) +# $(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +# endif +# $(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) +# endif +# endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +# $(BUILDDIR)/testxxx.o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt +# $(testmain): $(BUILDDIR)/testxxx.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt +# $(testmain): $(BUILDDIR)/testxxx_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions +# endif + +# $(BUILDDIR)/testmisc.o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/testmisc.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/testmisc_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests +# endif + +# $(BUILDDIR)/runTest.o: $(GTESTLIBS) +# $(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) +# $(testmain): $(BUILDDIR)/runTest.o +# $(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o + +# ifneq ($(GPUCC),) +# $(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) +# $(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +# $(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +# endif +# $(testmain): $(BUILDDIR)/runTest_cu.o +# $(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o +# endif + +# $(testmain): $(GTESTLIBS) +# $(testmain): INCFLAGS += $(GTESTINC) +# $(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main + +# ifneq ($(OMPFLAGS),) +# ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +# $(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +# else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +# $(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +# ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +# ###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +# else +# $(testmain): LIBFLAGS += -lgomp +# endif +# endif + +# # Bypass std::filesystem completely to ease portability on LUMI #803 +# #ifneq ($(findstring hipcc,$(GPUCC)),) +# #$(testmain): LIBFLAGS += -lstdc++fs +# #endif + +# ifeq ($(GPUCC),) # link only runTest.o +# $(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +# $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +# else # link both runTest.o and runTest_cu.o +# $(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) +# endif +# endif + +# # Use target gtestlibs to build only googletest +# ifneq ($(GTESTLIBS),) +# gtestlibs: $(GTESTLIBS) +# endif + +# # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +# $(GTESTLIBS): +# ifneq ($(shell which flock 2>/dev/null),) +# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +# flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +# else +# if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +# endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) +# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds +# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) +avxnone: + @echo + $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) + +avxsse4: + @echo + $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) + +avxavx2: + @echo + $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) + +avx512y: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) + +avx512z: + @echo + $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else ifeq ($(UNAME_P),arm) +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 +avxall: avxnone avxsse4 +else +###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z +avxall: avxnone avxsse4 avxavx2 avx512y avx512z +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so +endif + $(MAKE) -C ../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: check (run the C++ test executable) +# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] +ifneq ($(GPUCC),) +check: runTest cmpFcheck cmpFGcheck +else +check: runTest cmpFcheck +endif + +# Target: runTest (run the C++ test executable runTest.exe) +runTest: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/runTest.exe + +# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 + +# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) +runGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 + +# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 + +# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) +runFGcheck: all.$(TAG) + $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 + +# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) +cmpFGcheck: all.$(TAG) + @echo + @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi + +# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk similarity index 98% rename from epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk rename to epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk index efe82df88d..2c5f8509bb 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk @@ -571,21 +571,24 @@ override RUNTIME = cxx_main=$(BUILDDIR)/check.exe fcxx_main=$(BUILDDIR)/fcheck.exe +cxx_rwgtlib=$(BUILDDIR)/librwgt.a ifneq ($(GPUCC),) cu_main=$(BUILDDIR)/gcheck.exe fcu_main=$(BUILDDIR)/fgcheck.exe +cu_rwgtlib=$(BUILDDIR)/libgrwgt.a else cu_main= fcu_main= +cu_rwgtlib= endif testmain=$(BUILDDIR)/runTest.exe ifneq ($(GTESTLIBS),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(testmain) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) $(testmain) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) endif # Target (and build options): debug @@ -728,6 +731,11 @@ $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PAT $(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) +# Target (and build rules): C++ and CUDA rwgt libraries +cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) +$(cxx_rwgtlib): $(cxx_rwgtfiles) + ar rcs $@ $^ + ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) $(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') @@ -738,6 +746,9 @@ endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) +cu_rwgtfiles := $(BUILDDIR)/grwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(cu_objects_exe) +$(cu_rwgtlib): $(cu_rwgtfiles) + ar rcs $@ $^ endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 3811013e24..1b19198434 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2148,19 +2148,36 @@ def get_rwgt_includes(self): """Return string with the include directives for the REX reweighting""" return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) + def write_rwgt_header(self): + """Writes a simple rwgt_runner.h file to forward declare the runner object""" + # Adjust the placeholders for use with `.format()` + rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H + #define {namespace}_RWGT_RUNNER_H + #include \"teawREX.hpp\" + #include \"rwgt_instance.h\" + namespace {namespace} {{ + extern rwgt::instance runner; + }} + #endif""".format(namespace=self.get_proc_dir()) + + # Using `with` statement for better file handling + with open(os.path.join(self.path, 'rwgt_runner.h'), 'w') as ff: + ff.write(rwgt_h) + def edit_rwgt_runner(self): """Create the rwgt_runner.cc file for the REX reweighting""" ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') # Create the rwgt_runner.cc file # replace_dict = {} replace_dict = super().get_process_class_definitions(write=False) - rwgt_runner = self.get_proc_dir() + self.rwgt_template +# rwgt_runner = self.get_proc_dir() + self.rwgt_template replace_dict['process_namespace'] = self.get_proc_dir() replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() replace_dict['init_prt_ids'] = self.get_init_prts_vec(self.matrix_elements[0].get('processes')[0]) replace_dict['fin_prt_ids'] = self.get_fin_prts_vec(self.matrix_elements[0].get('processes')[0]) replace_dict['process_event'] = self.get_rwgt_legs(self.matrix_elements[0].get('processes')[0]) template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() + self.write_rwgt_header() ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') ff.write(template % replace_dict) ff.close() diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 2d92b35dd0..7a3b83f1d6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -374,12 +374,15 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', s+'gpu/perf.py', s+'gpu/profile.sh', - s+'CMake/SubProcesses/CMakeLists.txt'], + s+'CMake/SubProcesses/CMakeLists.txt', + s+'gpu/cudacpp_rex_driver.mk', + s+'REX/rwgt_instance.h', s+'REX/REX.hpp', s+'REX/teawREX.hpp'], 'test': [s+'gpu/cudacpp_test.mk']} - from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') - from_template['SubProcesses'].append(s+'REX/REX.hpp') - from_template['SubProcesses'].append(s+'REX/teawREX.hpp') +# from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') +# from_template['SubProcesses'].append(s+'REX/REX.hpp') +# from_template['SubProcesses'].append(s+'REX/teawREX.hpp') +# from_template['SubProcesses'].append(s+'gpu/cudacpp_rex_driver.mk') to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', @@ -403,13 +406,14 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 - 'perf.py', 'profile.sh'] + 'perf.py', 'profile.sh', + 'rwgt_instance.h', 'REX.hpp', 'teawREX.hpp'] - to_link_in_P.append('rwgt_instance.h') - to_link_in_P.append('REX.hpp') - to_link_in_P.append('teawREX.hpp') +# to_link_in_P.append('rwgt_instance.h') +# to_link_in_P.append('REX.hpp') +# to_link_in_P.append('teawREX.hpp') - template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex.mk') + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex_runner.mk') # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') @@ -459,7 +463,7 @@ def export_driver(self): replace_dict['include_lines'] = '' replace_dict['run_set'] = '' for name in self.rwgt_names: - replace_dict['include_lines'] += '#include "%s/rwgt_runner.cc"\n' % name + replace_dict['include_lines'] += '#include "%s/rwgt_runner.h"\n' % name replace_dict['run_set'] += '%s::runner,' % name replace_dict['run_set'] = replace_dict['run_set'][:-1] template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index f4c6ab927f..4fe4023730 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -91,7 +91,7 @@ int main( int argc, char** argv ){ // ZW : include rwgt_instances(s) std::vector runSet = {%(run_set)s}; - std::vector runSet; +// std::vector runSet; REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); fileCol.initCards(); diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index 374810a1aa..e87219b001 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -17,6 +17,8 @@ namespace rwgt{ + using FORTRANFPTYPE = double; + //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 689daabcdd..14d2dfdc79 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -14,10 +14,10 @@ #include "teawREX.hpp" #include "rwgt_instance.h" +#include "fbridge.cc" // ZW: SET UP NAMESPACE namespace %(process_namespace)s{ -#include "fbridge.cc" //namespace dummy{ struct fbridgeRunner{ @@ -111,7 +111,7 @@ namespace %(process_namespace)s{ // ZW: SET UP INPUT LHE BLOCK // ZW: SET UP REX::event FROM LHE BLOCK // auto procEvent = REX::event( procEvent ); - REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; +// REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; std::vector> eventVec = {%(process_event)s}; REX::event locEv = REX::event( eventVec ); diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 2c3c7ec7d1..971b563f82 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -27,10 +27,6 @@ #include #include "REX.hpp" -#ifndef FORTRANFPTYPE -#define FORTRANFPTYPE double -#endif - namespace REX::teaw { From 8c78800a6cd3dfa849290304a0eed04f51b20003 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 9 Apr 2024 14:00:08 +0200 Subject: [PATCH 16/76] separated REX into header and implementation, fixed compilation of P-directories into shared libraries that are linked to rwgt driver --- .../template_files/gpu/cudacpp_rex_driver.mk | 17 +- .../template_files/gpu/cudacpp_rex_runner.mk | 12 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 3 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 9 +- tools/REX/REX.cc | 1 + tools/REX/REX.h | 830 ++++++++ tools/REX/REX.hpp | 1181 ++++------- tools/REX/rwgt_driver.cc | 13 +- tools/REX/rwgt_instance.cc | 78 + tools/REX/rwgt_instance.h | 53 +- tools/REX/rwgt_runner.cc | 6 +- tools/REX/teawREX.cc | 1 + tools/REX/teawREX.h | 188 ++ tools/REX/teawREX.hpp | 218 +- tools/REX/tester.cpp | 19 +- tools/REX/unweighted_events.lhe | 1870 ----------------- 16 files changed, 1692 insertions(+), 2807 deletions(-) create mode 120000 tools/REX/REX.cc create mode 100644 tools/REX/REX.h create mode 100644 tools/REX/rwgt_instance.cc create mode 120000 tools/REX/teawREX.cc create mode 100644 tools/REX/teawREX.h delete mode 100644 tools/REX/unweighted_events.lhe diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk index 3a8c3e3e98..9889da9575 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk @@ -572,25 +572,24 @@ override RUNTIME = .PHONY: all $(DIRS) -# Assuming DIRS is defined as before DIRS := $(wildcard P*) + # Construct the library paths -rwgtlib := $(addprefix ,$(addsuffix /librwgt.a,$(DIRS))) +cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) +rwgtlib := $(addprefix ,$(addsuffix /librwgt.so,$(DIRS))) cxx_rwgt=$(BUILDDIR)/rwgt.exe ifneq ($(GPUCC),) cu_rwgt=$(BUILDDIR)/grwgt.exe -grwgtlib := $(addprefix $(DIRS)/,libgrwgt.a) +grwgtlib := $(addprefix $(DIRS)/,libgrwgt.so) +cu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cuda"}'; done) else cu_rwgt= grwgtlib= +cu_proclibs= endif -ifneq ($(GTESTLIBS),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) -else all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) -endif # Target (and build options): debug MAKEDEBUG= @@ -735,7 +734,7 @@ $(rwgtlib): # Target (and build rules): C++ and CUDA standalone executables $(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(rwgtlib) - $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(rwgtlib) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -l$(cxx_proclibs) $(rwgtlib) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -746,7 +745,7 @@ $(cu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) endif $(cu_rwgt): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_rwgt): rwgtlibs $(BUILDDIR)/grwgt.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(DIRS) - $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(grwgtlib) $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) + $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) -l$(cu_proclibs) $(grwgtlib) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk index 2c5f8509bb..80fbe5b8e7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk @@ -571,12 +571,12 @@ override RUNTIME = cxx_main=$(BUILDDIR)/check.exe fcxx_main=$(BUILDDIR)/fcheck.exe -cxx_rwgtlib=$(BUILDDIR)/librwgt.a +cxx_rwgtlib=$(BUILDDIR)/librwgt.so ifneq ($(GPUCC),) cu_main=$(BUILDDIR)/gcheck.exe fcu_main=$(BUILDDIR)/fgcheck.exe -cu_rwgtlib=$(BUILDDIR)/libgrwgt.a +cu_rwgtlib=$(BUILDDIR)/libgrwgt.so else cu_main= fcu_main= @@ -733,8 +733,8 @@ $(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objec # Target (and build rules): C++ and CUDA rwgt libraries cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) -$(cxx_rwgtlib): $(cxx_rwgtfiles) - ar rcs $@ $^ +$(cxx_rwgtlib): $(cxx_rwgtfiles) $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_rwgtfiles) $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -747,8 +747,8 @@ $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) cu_rwgtfiles := $(BUILDDIR)/grwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(cu_objects_exe) -$(cu_rwgtlib): $(cu_rwgtfiles) - ar rcs $@ $^ +$(cu_rwgtlib): $(cu_rwgtfiles) $(cu_objects_lib) + $(GPUCC) -shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 1b19198434..44b054d9b6 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2110,7 +2110,7 @@ class PLUGIN_OneProcessExporterRwgt(PLUGIN_OneProcessExporter): # ZW - rwgt functions def get_rwgt_legs(self, process): """Return string with particle ids and status in the REX std::pair format""" - return ",".join(["{%i,%i}" % (leg.get('state'), leg.get('id')) \ + return ",".join(["{\"%i\",\"%i\"}" % (leg.get('state'), leg.get('id')) \ for leg in process.get('legs')]).replace('0', '-1') def get_init_prts_vec(self, process): @@ -2153,7 +2153,6 @@ def write_rwgt_header(self): # Adjust the placeholders for use with `.format()` rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H #define {namespace}_RWGT_RUNNER_H - #include \"teawREX.hpp\" #include \"rwgt_instance.h\" namespace {namespace} {{ extern rwgt::instance runner; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 7a3b83f1d6..cce954413b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -354,7 +354,10 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', - s+'CMake/src/CMakeLists.txt' ], + s+'CMake/src/CMakeLists.txt', + s+'REX/REX.cc', s+'REX/teawREX.cc', + s+'REX/REX.h', s+'REX/teawREX.h', + s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'], 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', @@ -376,7 +379,7 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): s+'gpu/perf.py', s+'gpu/profile.sh', s+'CMake/SubProcesses/CMakeLists.txt', s+'gpu/cudacpp_rex_driver.mk', - s+'REX/rwgt_instance.h', s+'REX/REX.hpp', s+'REX/teawREX.hpp'], + s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], 'test': [s+'gpu/cudacpp_test.mk']} # from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') @@ -407,7 +410,7 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 'perf.py', 'profile.sh', - 'rwgt_instance.h', 'REX.hpp', 'teawREX.hpp'] + 'rwgt_instance.h', 'REX.h', 'teawREX.h'] # to_link_in_P.append('rwgt_instance.h') # to_link_in_P.append('REX.hpp') diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc new file mode 120000 index 0000000000..65f267f382 --- /dev/null +++ b/tools/REX/REX.cc @@ -0,0 +1 @@ +REX.hpp \ No newline at end of file diff --git a/tools/REX/REX.h b/tools/REX/REX.h new file mode 100644 index 0000000000..2dead7d333 --- /dev/null +++ b/tools/REX/REX.h @@ -0,0 +1,830 @@ +/*** + * ______ _______ __ + * | ___ \ ___\ \ / / + * | |_/ / |__ \ V / + * | /| __| / \ + * | |\ \| |___/ /^\ \ + * \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _REX_H_ +#define _REX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ZW: all fcns within the REX standard sit in the +// namespace REX +// Note that as a convention, std::string_view objects will be +// referred to as strings unless the difference is relevant +namespace REX +{ + #pragma warning( push ) + #pragma warning( disable : 4101) + static const size_t npos = -1; + #pragma warning( pop ) + + using sortFcn = std::function>(std::vector)>; + using statSort = std::function>(std::string_view, std::vector)>; + + template + std::shared_ptr> stoiSort(const std::vector &vector); + extern template std::shared_ptr> stoiSort(const std::vector &vector); + + template + std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + extern template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + + std::shared_ptr> nuWordSplitter( std::string_view line ); + + struct xmlTree; + +// ZW: struct for handling tags in XML node opening tags + struct xmlTag { + public: + void setVal( std::string_view valSet ); + void setId( std::string_view idSet ); + std::string_view getVal(); + std::string_view getId(); + bool isModded(); + xmlTag(); + xmlTag( xmlTag& oldTag ); + xmlTag( std::string_view initId, std::string_view initVal); + protected: + bool modded; + std::string_view val; + std::string_view id; + }; + + struct xmlTree{ + public: + xmlTree(); + xmlTree( std::string_view file ); + xmlTree( std::string_view file, size_t& strt, size_t& nd ); + auto& getChildren(){ return children; } + std::string_view& getOrigin(){ return origin; } + size_t getStart(){ return start; } + size_t getEnd(){ return end; } + size_t getContStart(){ return contSt; } + size_t getContEnd(){ return contEnd; } + bool isFaux(){ return faux; } + bool isInit(){ return initialised; } + bool hasChildren(){ return children->size() > 0; } + protected: + std::shared_ptr>> children; // vector of pointers to children nodes + std::string_view origin; + size_t start; // position of opening bracket of node opening + size_t end; // position of final character of ending node, including trailing blankspace + size_t contSt; + size_t contEnd; + bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes + bool initialised; + }; + + struct xmlNode { + public: + xmlNode(); + xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + xmlNode( xmlTree &tree ); + std::vector> getChildren(); + std::vector> getTags(); + std::string_view getFile(); + std::string_view getName(); + std::string_view getContent(); + size_t getStart(); + size_t getEnd(); + xmlTree getTree(); + virtual bool isModded(); + virtual bool isModded( bool deep ); + bool isWritten(); + bool isParsed(); + bool isFaux(); + bool hasChildren(); + void setModded( bool mod ); + bool deepModded(); + bool deepParse(); + void parser( bool recursive ); + void addChild( std::shared_ptr child ); + void addTag( std::shared_ptr tag ); + void setFile( std::string_view file ); + void setName( std::string_view newName ); + void setCont( std::string_view cont ); + protected: + virtual bool parse(); + virtual bool parse( bool recurs ); + bool parseTop(); + virtual bool parseContent(); + bool parseChildren( bool recursive ); + std::string nodeHeader; + std::string nodeContent; + std::string nodeEnd; + xmlTree structure; + std::vector> children; + std::vector> tags; + std::shared_ptr writtenSelf; + bool deepMod = false; + std::string_view xmlFile; + std::string_view name; + std::string_view content; + size_t start; + size_t end = npos; + bool modded = false; + bool written = false; + bool parsed = false; + bool deepParsed = false; + bool faux = false; + virtual void headWriter(); + virtual void endWriter(); + virtual void contWriter(); + virtual void childWriter(); + virtual void endFinder(); + virtual void fullWriter(); + public: + virtual int childCounter(); + virtual void childCounter( int& noChilds ); + virtual std::shared_ptr nodeWriter(); + }; + + struct lhePrt{ + public: + std::string_view getLine(); + std::string_view getComment(); + std::vector getMom(); + std::string_view getE(); + std::string_view getMass(); + std::string_view getVTim(); + std::string_view getSpin(); + std::string_view getPDG(); + std::string_view getStatus(); + std::vector getMothers(); + std::vector getColor(); + void setComment( std::string_view nuCom ); + void setMom( std::vector nuMom ); + void setEnergy( std::string_view nuE ); + void setMass( std::string_view nuM ); + void setVTim( std::string_view nuVTim ); + void setSpin( std::string_view nuSpin ); + void setPDG( std::string_view nuPDG ); + void setStatus( std::string_view nuSt ); + void setMothers( std::vector nuMum ); + void setColors( std::vector nuCol ); + bool isModded(); + bool isWritten(); + std::shared_ptr getContent(); + lhePrt(); + lhePrt( std::pair& prtInfo ); + lhePrt( std::pair& prtInfo ); + lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ); + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view mom[3]; + std::string_view energy; + std::string_view mass; + std::string_view vtim; + std::string_view spin; + std::string_view pdg; + std::string_view status; + std::string_view mothers[2]; + std::string_view icol[2]; + bool modded = false; + bool written = false; + void writer(); + }; + + struct evHead { + public: + std::string_view getComment(); + std::string_view getWeight(); + std::string_view getScale(); + std::string_view getAQED(); + std::string_view getAQCD(); + std::string_view getNprt(); + std::string_view getProcID(); + bool isModded(); + bool isWritten(); + void setComment( std::string_view nuCom ); + void setWeight( std::string_view nuWgt ); + void setScale( std::string_view nuScale ); + void setAQED( std::string_view nuAQED ); + void setAQCD( std::string_view nuAQCD ); + void setNprt( std::string_view nuNprt ); + void setNprt( int nuNprt ); + void setProcID( std::string_view nuProcID ); + std::shared_ptr getContent(); + evHead(); + evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ); + protected: + std::shared_ptr content; + std::string_view sourceFile; + std::string_view comment; + std::string_view weight; + std::string_view scale; + std::string_view aqed; + std::string_view aqcd; + std::string_view nprt; + int nprtint; + std::string nprtstr; + std::string_view procid; + bool modded = false; + bool written = false; + void writer(); + }; + + struct bodyWgt : public xmlNode { + public: + void setComment( std::string_view nuComment ); + void setVal( std::string nuVal ); + void setVal( std::string_view nuVal ); + void setVal( double nuVal ); + void setId( std::string nuId ); + void setModded( bool nuModded ); + std::string_view getComment(); + std::string_view getValS(); + double getValD(); + bodyWgt(); + bodyWgt( std::string_view value ); + bodyWgt( double value ); + bodyWgt( std::string_view value, xmlTag rwgtId ); + bodyWgt( double value, xmlTag rwgtId ); + bodyWgt( std::string_view value, std::shared_ptr rwgtId ); + bodyWgt( double value, std::shared_ptr rwgtId ); + bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + bodyWgt( xmlNode& wgtNode ); + bodyWgt( xmlNode* wgtNode ); + bodyWgt( std::shared_ptr wgtNode ); + bodyWgt( xmlTree& wgtTree ); + bodyWgt( xmlTree* wgtTree ); + bodyWgt( std::shared_ptr wgtTree ); + bodyWgt( double value, std::string& idTag ); + void appendWgt( std::shared_ptr document ); + void appendWgt( std::string* document ); + std::shared_ptr appendWgt( std::string_view document ); + protected: + std::string_view comment; + std::string valS; + std::string id; + double valD; + void fullWriter() override; + }; + + struct event : public xmlNode { + public: + evHead getHead(); + std::vector> getPrts(); + std::vector> getWgts(); + void setHead( evHead head ); + void addPrt( std::shared_ptr prtcl ); + void addPrt( lhePrt prtcl ); + void setPrts( std::vector> prtcls ); + void addWgt( bodyWgt nuWgt ); + void addWgt( std::shared_ptr nuWgt ); + void addWgt( bodyWgt nuWgt, std::string& id ); + void addWgt( std::shared_ptr nuWgt, std::string& id ); + bool newWeight(); + int getNprt(); + bool isModded() override; + bool isModded( bool deep ) override ; + event(); + event( std::vector>& prtInfo ); + event( std::vector>& prtInfo ); + event( std::vector> prtInfo ); + event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) ; + event( const xmlNode& originFile ); + event( const xmlNode* originFile ); + event( const std::shared_ptr& originFile ); + event( xmlTree& originFile ); + event( xmlTree* originFile ); + event( std::shared_ptr originFile ); + bool prtsAreMod(); + bool headIsMod(); + bool isSpecSort() const; + sortFcn getSortFcn() const; + statSort getStatSort() const; + protected: + std::vector> rwgt; + std::shared_ptr childRwgt; + bool hasRwgt(); + bool rwgtChild(); + bool bothRwgt(); + bool eitherRwgt(); + evHead header; + bool hasBeenProc = false; + std::vector> prts; + std::map> procMap; + std::map> procOrder; + sortFcn eventSort = []( std::vector vec ){ return stoiSort( vec ); }; + statSort specSort = []( std::string_view stat, std::vector vec ){ return stoiSort( vec ); }; + bool specSorted = false; + bool initProcMap(bool hard = false); + bool initProcMap( sortFcn sorter, bool hard = false ); + bool initProcMap( statSort sorter, bool hard = false ); + bool inRwgtChild( std::string_view name ); + bool checkRwgtOverlap(); + void childRwgtWriter(); + void vecRwgtWriter( bool midNode = false ); + void rwgtWriter(); + void contWriter() override; + void childWriter() override; + bool addedWgt = false; + void fullWriter() override; + void fullWriter( bool deep ); + void appendWgts(); + public: + std::shared_ptr nodeWriter() override; + std::shared_ptr nodeWriter( bool recursive ); + std::map> &getProc(); + std::map> &getProcOrder(); + std::map> getProc() const; + std::map> getProcOrder() const; + std::map> &getProc(sortFcn sorter); + std::map> &getProcOrder(sortFcn sorter); + std::map> &getProc(statSort sorter); + std::map> &getProcOrder(statSort sorter); + }; + + struct paramVal{ + public: + double value = 0; + int id = 0; + std::string_view realLine; + std::string_view comment; + std::string_view idStr; + std::string_view valStr; + virtual void parse(); + paramVal(); + paramVal( std::string_view paramLine, bool parseOnline = false ); + bool isMod(); + bool modded = false; + virtual std::shared_ptr selfWrite(); + }; + + struct paramBlock { + public: + std::string_view realBlock; + size_t startPt; + std::string_view comment; + std::string_view initComm; + std::string_view name; + std::vector params; + virtual void parse( bool parseOnline = false ); + paramBlock(); + paramBlock( std::string_view paramSet, bool parseOnline = false ); + bool isMod(); + bool modded = false; + virtual std::shared_ptr selfWrite(); + }; + + struct decVal : public paramVal{ + public: + void parse() override; + decVal( std::string_view paramLine = "", bool parseOnline = false ); + std::shared_ptr selfWrite() override; + }; + + struct decBlock : public paramBlock { + public: + std::vector decays; + void parse( bool parseOnline = false ) override; + void parse( std::shared_ptr> decLines, bool parseOnline = false ); + decBlock( std::string_view paramSet = "", bool parseOnline = false ); + std::shared_ptr selfWrite() override; + }; + + bool clStringComp( std::string_view str1, std::string str2 ); + + struct lesHouchesCard { + public: + decBlock decays; + std::string_view xmlFile; + size_t start; + size_t end; + bool modded; + bool parsed; + std::string_view header; + std::vector blocks; + size_t blockStart; + std::function lambda = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; + std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) + { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; + std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) + { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; + void parse( bool parseOnline = false ); + lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ); + bool isMod(); + std::shared_ptr selfWrite(); + }; + + + struct headWeight : public xmlNode { + public: + int getId(); + std::string_view getTag(); + bool hasTag(); + headWeight(); + headWeight( std::string_view paramSet, const size_t& begin = 0 ); + headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ); + headWeight( xmlNode& node ); + headWeight( xmlNode* node ); + headWeight( std::shared_ptr node ); + headWeight( xmlTree& tree ); + headWeight( xmlTree* tree ); + headWeight( std::shared_ptr tree ); + headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ); + headWeight( std::string_view paramSet, std::string& idText); + void setId( std::string identity ); + protected: + std::string idTag; + long unsigned int id = npos; + void headWriter() override; + void headWriter( bool incId ); + void endWriter() override; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + void fullWriter() override; + void fullWriter( bool incId, bool hasChildren=true ); + }; + + + // ZW: struct for handling rwgt groups + // in the LHE header initrwgt node + struct weightGroup : public xmlNode { + public: + bool getIncId(); + void setIncId( bool nuIncId ); + std::vector> getWgts(); + void addWgt( headWeight nuWgt ); + void addWgt( std::shared_ptr nuWgt ); + weightGroup(); + weightGroup( std::vector> nuWgts ); + weightGroup( std::vector nuWgts ); + weightGroup( xmlNode& wgtNode ); + weightGroup( xmlNode* wgtNode ); + weightGroup( xmlTree& wgtTree ); + weightGroup( xmlTree* wgtTree ); + weightGroup( std::shared_ptr wgtTree ); + weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + protected: + std::string_view rwgtName; + std::string_view wgtNamStrat; + bool includeId = false; + std::vector> paramSets; + bool nu; + std::string_view idTag; + int id; + void headWriter() override; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + void endWriter() override; + }; + + + struct initRwgt : public xmlNode { + public: + std::vector> getGroups(); + size_t noGrps(); + void addGroup( weightGroup nuGroup ); + void addGroup( std::shared_ptr nuGroup ); + void addWgt( unsigned int index, std::shared_ptr nuWgt ); + void addWgt( unsigned int index, headWeight nuWgt ); + initRwgt(); + initRwgt( std::vector> nuGroups ); + initRwgt( xmlNode& wgtNode ); + initRwgt( xmlNode* wgtNode ); + initRwgt( std::shared_ptr wgtNode ); + initRwgt( xmlTree& wgtTree ); + protected: + bool grpIsInit = false; + bool grpInit( std::shared_ptr& wgt ); + std::vector> groups; + void contWriter() override; + void childWriter() override; + void childWriter( bool hasChildren ); + }; + + struct lheInitHead{ + public: + std::string_view idbmup[2]; + std::string_view ebmup[2]; + std::string_view pdfgup[2]; + std::string_view pdfsup[2]; + std::string_view idwtup; + std::string_view nprup; + bool isWritten(); + bool isModded(); + std::shared_ptr getContent(); + lheInitHead( std::string_view initHead ); + lheInitHead( xmlNode& initNode ); + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(); + }; + + struct lheInitLine { + public: + std::string_view xsecup; + std::string_view xerrup; + std::string_view xmaxup; + std::string_view lprup; + bool isWritten(); + bool isModded(); + std::shared_ptr getContent(); + lheInitLine(); + lheInitLine( std::string_view procLine ); + protected: + std::shared_ptr content; + bool written = false; + bool modded = false; + void writer(); + }; + + + struct slhaNode : public xmlNode { + public: + std::shared_ptr getParameters(); + slhaNode(); + slhaNode( lesHouchesCard parameters ); + slhaNode( std::shared_ptr parameters ); + slhaNode( xmlNode& node, bool parseOnline = false ); + slhaNode( xmlNode* node, bool parseOnline = false ); + slhaNode( std::shared_ptr node, bool parseOnline = false ); + slhaNode( xmlTree tree, bool parseOnline = false ); + slhaNode( std::shared_ptr tree, bool parseOnline = false ); + slhaNode( xmlTree* tree, bool parseOnline = false ); + slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ); + protected: + std::shared_ptr parameterCard; + bool pCardInit = false; + void headWriter() override; + void endWriter() override; + void contWriter() override; + }; + + struct initNode : public xmlNode { + public: + std::shared_ptr getHead(); + std::vector> getLines(); + void setHead( std::shared_ptr head ); + void setLines( std::vector> lines ); + void addLine( std::shared_ptr line ); + initNode(); + initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ); + initNode( xmlNode& node, bool parseOnline = false ); + initNode( xmlNode* node, bool parseOnline = false ); + initNode( std::shared_ptr node, bool parseOnline = false ); + initNode( xmlTree tree, bool parseOnline = false ); + initNode( std::shared_ptr tree, bool parseOnline = false ); + initNode( xmlTree* tree, bool parseOnline = false ); + protected: + std::shared_ptr initHead; + std::vector> initLines; + bool parseContent() override; + void contWriter() override; + }; + + struct lheHead : public xmlNode { + public: + size_t addWgtGroup( std::shared_ptr& wgtGroup ); + size_t addWgtGroup( weightGroup wgtGroup ); + void addWgt( size_t index, std::shared_ptr nuWgt ); + void addWgt( size_t index, headWeight nuWgt ); + void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ); + void addWgt( size_t index, headWeight nuWgt, std::string idTagg ); + void setInitRwgt( initRwgt initWgt ); + void setInitRwgt( std::shared_ptr initWgt ); + std::vector> getWgtGroups(); + std::shared_ptr getInitRwgt(); + std::shared_ptr getParameters(); + void setParameters( std::shared_ptr params ); + bool rwgtInc(); + lheHead(); + lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + lheHead( xmlNode& node ); + lheHead( xmlNode* node ); + lheHead( std::shared_ptr node ); + lheHead( xmlTree tree ); + lheHead( std::shared_ptr tree ); + lheHead( xmlTree* tree ); + protected: + bool wgtGrpIsInit = false; + bool wgtGrpInit( std::shared_ptr& wgtGrp ); + std::shared_ptr parameters; + bool hasRwgt = false; + std::shared_ptr rwgtNodes; + std::vector> initrwgt; + bool relChildSet = false; + std::vector relChild; + void setRelChild(); + bool parseChildren( bool recursive ); + void headWriter() override; + void childWriter() override; + void fullWriter() override; + }; + + struct newWgt{ + protected: + std::shared_ptr headWgt; + std::vector> bodyWgts; + public: + newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ); + newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ); + newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ); + newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ); + newWgt( std::string& parameters ); + newWgt( std::string& parameters, std::string& idTag ); + std::shared_ptr getHeadWgt(); + std::vector> getBodyWgts(); + void addBdyWgts( std::shared_ptr> wgts ); + }; + + + struct lheNode : public xmlNode { + public: + lheNode(); + lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); + std::shared_ptr getHeader(); + std::shared_ptr getInit(); + std::vector> getEvents(); + bool isModded() override; + bool isModded( bool deep ) override; + void setInit( std::shared_ptr initNod ); + void setHeader( std::shared_ptr headNod ); + void addWgt( size_t index, newWgt& addedWgt ); + void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ); + void setRelStats( std::vector& particles ); + std::vector& getRelStats(); + void setSameSort( sortFcn& sortF ); + sortFcn& getSameSort(); + void setStatSort( statSort& statS ); + statSort& getStatSort(); + protected: + std::vector> events = {}; + std::shared_ptr header = std::make_shared(xmlFile, start); + std::shared_ptr init = std::make_shared(xmlFile, start); + std::vector relStat = {"-1", "1"}; + sortFcn particleSort = []( std::vector prts ){ return stoiSort(prts); }; + statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stoiSort(prts); }; + virtual void headerWriter(); + virtual void initWriter(); + virtual void eventWriter(); + void contWriter() override; + void fullWriter() override; + public: + virtual std::shared_ptr nodeWriter(); + }; + + struct evtInfo { + public: + std::vector wgts; + std::vector scales; + std::vector aQEDs; + std::vector aQCDs; + std::vector nprts; + std::vector relNPrts; + std::vector procIDs; + evtInfo( const std::vector>& lheFile = {} ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + sortFcn sorter ); + evtInfo( const std::vector>& lheFile, const std::vector& statVec, + statSort sorter ); + }; + + struct prtInfo { + public: + std::vector moms; + std::vector masses; + std::vector vtims; + std::vector spins; + std::vector statuses; + std::vector mothers; + std::vector icols; + std::vector pdgs; + prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + sortFcn sorter ); + prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + statSort sorter ); + }; + + struct transMonoLHE { + public: + evtInfo evtsHead; + prtInfo evtsData; + std::shared_ptr process; + transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ); + transMonoLHE( const std::vector>& lheFile, const int nPrt, + statSort sorter, + std::vector statVec = { "-1", "1" } ); + }; + + struct transLHE { + public: + std::string_view xmlFile; + std::vector> subProcs; + std::vector> procSets; + std::vector>> relProcs; + transLHE(); + transLHE( lheNode& lheFile ); + transLHE( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = { "-1", "1" } ); + transLHE( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = { "-1", "1" } ); + transLHE( lheNode& lheFile, const std::vector& statVec ); + std::shared_ptr> vectorFlat( std::vector>> vecVec ); + }; + + struct lheRetDs{ + public: + bool ebmup = false; + bool xsecup = false; + bool xerrup = false; + bool xmaxup = false; + bool xwgtup = false; + bool scalup = false; + bool aqedup = false; + bool aqcdup = false; + bool pup = true; + bool mass = false; + bool vtimup = false; + bool spinup = false; + std::vector getBools(); + }; + + // ZW: bool struct to define which int values + // to extract transposed from LHE file + struct lheRetInts{ + public: + //bool maxpup = false; + bool idbmup = false; + bool pdfgup = false; + bool pdfsup = false; + bool idwtup = false; + bool nprup = false; + bool lprup = false; + //bool maxnup = false; + bool nup = true; + bool idprup = false; + bool idup = true; + bool istup = true; + bool mothup = false; + bool icolup = false; + std::vector getBools(); + }; + + struct eventComp{ + bool operator()( event& firstEv, event& secEv); + bool operator()( const event& firstEv, const event& secEv) const; + bool operator()(event& firstEv, event& secEv, std::vector statVec); + }; + + +std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ); + +std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ); + +// struct lhePrt; +// struct xmlNode; +// struct event : public xmlNode; +// event& makeEv( std::vector>& particles ); +// std::vector> getParticles( event& ev ); +// struct eventComp; +} + +#endif diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index bbf7596c2f..706c65cca4 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -15,8 +15,8 @@ // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. -#ifndef _REX_HPP_ -#define _REX_HPP_ +#ifndef _REX_CC_ +#define _REX_CC_ #include #include @@ -35,6 +35,8 @@ #include #include #include +#include "REX.h" +#include // ZW: all fcns within the REX standard sit in the // namespace REX @@ -42,10 +44,6 @@ // referred to as strings unless the difference is relevant namespace REX { - #pragma warning( push ) - #pragma warning( disable : 4101) - static const size_t npos = -1; - #pragma warning( pop ) using sortFcn = std::function>(std::vector)>; using statSort = std::function>(std::string_view, std::vector)>; @@ -67,9 +65,11 @@ namespace REX template std::shared_ptr> stoiSort(const std::vector &vector) { - std::function stoicomp = [](const T& i, const T& j) { return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; + std::function stoicomp = [](const T& i, const T& j) { + return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; return indSort(vector, stoicomp); } + template std::shared_ptr> stoiSort(const std::vector &vector); // ZW: wrapper for indSort for comparing string-type arguments representing doubles template @@ -92,22 +92,24 @@ namespace REX indexMap[reference[i]].push(i); } - std::shared_ptr> order; - order->reserve(to_sort.size()); // Pre-allocate memory - + auto order = std::make_shared>(std::vector(to_sort.size(), npos)); + //order->reserve(to_sort.size()); // Pre-allocate memory + size_t pos = 0; for (const auto& elem : to_sort) { auto it = indexMap.find(elem); if (it != indexMap.end() && !it->second.empty()) { - order->push_back(it->second.front()); + order->at(pos) = (it->second.front()); it->second.pop(); - } else { + } //else { // Element in vec2 not found in vec1 - order->push_back(npos); - } + // order->at(pos) = npos; + //} + ++pos; } return order; } + template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); // ZW: minimal fcn for counting the amount of times // a given search term appears in a string @@ -232,16 +234,32 @@ namespace REX // ZW: templated fcn for comparing two // string-like objects, ignoring cases - template - bool clStringComp( const Str1& org, const Str2& comp ){ + bool clStringComp( std::string_view org, std::string comp ){ return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); } - template - bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ - return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + bool clStringComp( std::string_view org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); } + bool clStringComp( std::string org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + bool clStringComp( std::string org, std::string comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + // template + // bool clStringComp( const Str1& org, const Str2& comp ){ + // return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } + // template + // bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ + // return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } // ZW: templated fcn for finding a caseless substring searchTerm in srcFile // On failure to find searchTerm, returns REX::npos @@ -347,25 +365,18 @@ namespace REX } // ZW: struct for handling tags in XML node opening tags - struct xmlTag { - public: - void setVal( std::string_view valSet ){ modded = true; val = valSet; } - void setId( std::string_view idSet ){ modded = true; id = idSet; } - std::string_view getVal(){ return val; } - std::string_view getId(){ return id; } - bool isModded(){ return modded; } - xmlTag(){ modded = false; return; } - xmlTag( xmlTag& oldTag ){ + void xmlTag::setVal( std::string_view valSet ){ modded = true; val = valSet; } + void xmlTag::setId( std::string_view idSet ){ modded = true; id = idSet; } + std::string_view xmlTag::getVal(){ return val; } + std::string_view xmlTag::getId(){ return id; } + bool xmlTag::isModded(){ return modded; } + xmlTag::xmlTag(){ modded = false; return; } + xmlTag::xmlTag( xmlTag& oldTag ){ modded = false; val = oldTag.getVal(); id = oldTag.getId(); } - xmlTag( std::string_view initId, std::string_view initVal){ + xmlTag::xmlTag( std::string_view initId, std::string_view initVal){ modded = false; val = initVal; id = initId; } - protected: - bool modded; - std::string_view val; - std::string_view id; - }; // ZW: function for parsing XML opening // tags and returning the next header tag @@ -385,10 +396,8 @@ namespace REX // end of each node s.t. the proper node structures can accurately // detail where children begin and end while allowing for personal // content between child nodes - struct xmlTree { - public: - xmlTree(){ return; } - xmlTree( std::string_view file ){ + xmlTree::xmlTree(){ return; } + xmlTree::xmlTree( std::string_view file ){ origin = file; children = std::make_shared>>(); start = file.find_first_not_of(" \n\r\f\t\v"); @@ -428,7 +437,7 @@ namespace REX end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); initialised = true; } - xmlTree( std::string_view file, size_t& strt, size_t& nd ){ + xmlTree::xmlTree( std::string_view file, size_t& strt, size_t& nd ){ origin = file; children = std::make_shared>>(); start = file.find_first_not_of(" \n\r\f\t\v", strt); @@ -475,31 +484,10 @@ namespace REX strt = end; nd = nodeEndFind(file, strt); } - auto& getChildren(){ return children; } - std::string_view& getOrigin(){ return origin; } - size_t getStart(){ return start; } - size_t getEnd(){ return end; } - size_t getContStart(){ return contSt; } - size_t getContEnd(){ return contEnd; } - bool isFaux(){ return faux; } - bool isInit(){ return initialised; } - bool hasChildren(){ return children->size() > 0; } - protected: - std::shared_ptr>> children; // vector of pointers to children nodes - std::string_view origin; - size_t start; // position of opening bracket of node opening - size_t end; // position of final character of ending node, including trailing blankspace - size_t contSt; - size_t contEnd; - bool faux = false; // boolean showing whether this item is a true node or content squeezed between nodes - bool initialised; - }; // ZW: struct for handling nodes in generic XML files - struct xmlNode { - public: - xmlNode(){ modded = false; return; } - xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ){ + xmlNode::xmlNode(){ modded = false; return; } + xmlNode::xmlNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ){ modded = false; xmlFile = originFile; structure = xmlTree( originFile ); @@ -513,7 +501,7 @@ namespace REX children.push_back( std::make_shared( *child ) ); } } - xmlNode( xmlTree &tree ){ + xmlNode::xmlNode( xmlTree &tree ){ modded = false; structure = tree; if( !structure.isInit() ){ return; } @@ -528,43 +516,43 @@ namespace REX children.push_back( std::make_shared( *child ) ); } } - std::vector> getChildren(){ return children; } - std::vector> getTags(){ return tags; } - std::string_view getFile(){ return xmlFile; } - std::string_view getName(){ return name; } - std::string_view getContent(){ return content; } - size_t getStart(){ return start; } - size_t getEnd(){ return end; } - xmlTree getTree(){ return structure; } - virtual bool isModded(){ return modded; } - virtual bool isModded( bool deep ){ + std::vector> xmlNode::getChildren(){ return children; } + std::vector> xmlNode::getTags(){ return tags; } + std::string_view xmlNode::getFile(){ return xmlFile; } + std::string_view xmlNode::getName(){ return name; } + std::string_view xmlNode::getContent(){ return content; } + size_t xmlNode::getStart(){ return start; } + size_t xmlNode::getEnd(){ return end; } + xmlTree xmlNode::getTree(){ return structure; } + bool xmlNode::isModded(){ return modded; } + bool xmlNode::isModded( bool deep ){ bool modStat = isModded(); if( !deep ){ return modStat; } for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } return modStat; } - bool isWritten(){ return written; } - bool isParsed(){ return parsed; } - bool isFaux(){ return faux; } - bool hasChildren(){ return children.size() > 0; } - void setModded( bool mod ){ modded = mod; } - bool deepModded(){ return deepMod; } - bool deepParse(){ return deepParsed; } - void parser( bool recursive ){ + bool xmlNode::isWritten(){ return written; } + bool xmlNode::isParsed(){ return parsed; } + bool xmlNode::isFaux(){ return faux; } + bool xmlNode::hasChildren(){ return children.size() > 0; } + void xmlNode::setModded( bool mod ){ modded = mod; } + bool xmlNode::deepModded(){ return deepMod; } + bool xmlNode::deepParse(){ return deepParsed; } + void xmlNode::parser( bool recursive ){ parsed = parse( recursive ); } - void addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } - void addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } - void setFile( std::string_view file ){ modded = true; xmlFile = file; } - void setName( std::string_view newName ){ modded = true; name = newName; } - void setCont( std::string_view cont ){ modded = true; content = cont; } - protected: - virtual bool parse(){ + void xmlNode::addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } + void xmlNode::addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } + void xmlNode::setFile( std::string_view file ){ modded = true; xmlFile = file; } + void xmlNode::setName( std::string_view newName ){ modded = true; name = newName; } + void xmlNode::setCont( std::string_view cont ){ modded = true; content = cont; } + + bool xmlNode::parse(){ auto topStat = parseTop(); auto contStat = parseContent(); return ( topStat && contStat ); } - virtual bool parse( bool recurs ) + bool xmlNode::parse( bool recurs ) { bool parseSt = parse(); if( !recurs ){ return parseSt; } @@ -572,14 +560,14 @@ namespace REX deepMod = true; return (parseSt && childSt ); } - bool parseTop(){ + bool xmlNode::parseTop(){ if( xmlFile == "" ){ return false; } if( isFaux() ){ return true; } size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } return true; } - virtual bool parseContent(){ + bool xmlNode::parseContent(){ if( xmlFile == "" ){ return false; } end = structure.getContEnd(); for( auto branch : *(structure.getChildren()) ){ @@ -587,7 +575,7 @@ namespace REX } return true; } - bool parseChildren( bool recursive ){ + bool xmlNode::parseChildren( bool recursive ){ bool status = true; if( recursive ){ for( auto child : children ) @@ -604,25 +592,7 @@ namespace REX } return status; } - std::string nodeHeader; - std::string nodeContent; - std::string nodeEnd; - xmlTree structure; - std::vector> children; - std::vector> tags; - std::shared_ptr writtenSelf; - bool deepMod = false; - std::string_view xmlFile; - std::string_view name; - std::string_view content; - size_t start; - size_t end = npos; - bool modded = false; - bool written = false; - bool parsed = false; - bool deepParsed = false; - bool faux = false; - virtual void headWriter() { + void xmlNode::headWriter() { if( isFaux() ){ return; } nodeHeader = "<" + std::string(name) ; for( auto tag : tags ){ @@ -630,24 +600,24 @@ namespace REX } nodeHeader += ">"; } - virtual void endWriter() { + void xmlNode::endWriter() { if( isFaux() ){ return; } auto endSt = xmlFile.find_last_of("<", end); nodeEnd = xmlFile.substr( endSt, end - endSt ); } - virtual void contWriter() { + void xmlNode::contWriter() { if( hasChildren() ){ nodeContent = std::string(content.substr(0, children[0]->start - 1 )); } else { nodeContent = std::string(content); } } - virtual void childWriter() { + void xmlNode::childWriter() { for(auto child : children){ nodeContent += (*child->nodeWriter()); } } - virtual void endFinder(){ + void xmlNode::endFinder(){ auto headEnd = xmlFile.find(">", start); auto slashPos = xmlFile.find("/", start); if( headEnd > slashPos ){ end = headEnd; } @@ -655,7 +625,7 @@ namespace REX if( end == npos ){ end = xmlFile.size(); return; } end += 2; } - virtual void fullWriter(){ + void xmlNode::fullWriter(){ if( isModded() ){ headWriter(); contWriter(); @@ -669,8 +639,8 @@ namespace REX written = true; } } - public: - virtual void childCounter( int& noChilds ) + + void xmlNode::childCounter( int& noChilds ) { for( auto child : children ) { @@ -679,16 +649,16 @@ namespace REX } noChilds += children.size(); } - virtual int childCounter() { + int xmlNode::childCounter() { int noChilds = 0; childCounter( noChilds ); return noChilds; } - virtual std::shared_ptr nodeWriter() { + std::shared_ptr xmlNode::nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; } - }; + // ZW: function for large scale parsing of XML files // sequentially goes through the document and @@ -715,17 +685,15 @@ namespace REX // ZW: struct for handling rwgt parameter sets // in the LHE header initrwgt node - struct headWeight : public xmlNode { - public: - int getId(){ return id; } - std::string_view getTag(){ return idTag; } - bool hasTag(){ return (idTag.size() > 0); } - headWeight(){ name = "weight"; return; } - headWeight( std::string_view paramSet, const size_t& begin = 0 ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } - headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin = 0 ) : xmlNode(){ + int headWeight::headWeight::getId(){ return id; } + std::string_view headWeight::getTag(){ return idTag; } + bool headWeight::hasTag(){ return (idTag.size() > 0); } + headWeight::headWeight(){ name = "weight"; return; } + headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight::headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } - headWeight( xmlNode& node ) : xmlNode( node ){ + headWeight::headWeight( xmlNode& node ) : xmlNode( node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -735,7 +703,7 @@ namespace REX } } } - headWeight( xmlNode* node ) : xmlNode( *node ){ + headWeight::headWeight( xmlNode* node ) : xmlNode( *node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -745,7 +713,7 @@ namespace REX } } } - headWeight( std::shared_ptr node ) : xmlNode( *node ){ + headWeight::headWeight( std::shared_ptr node ) : xmlNode( *node ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -755,7 +723,7 @@ namespace REX } } } - headWeight( xmlTree& tree ) : xmlNode( tree ){ + headWeight::headWeight( xmlTree& tree ) : xmlNode( tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -765,7 +733,7 @@ namespace REX } } } - headWeight( xmlTree* tree ) : xmlNode( *tree ){ + headWeight::headWeight( xmlTree* tree ) : xmlNode( *tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -775,7 +743,7 @@ namespace REX } } } - headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ + headWeight::headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ parser( false ); name = "weight"; for (auto tag : tags ){ @@ -785,17 +753,14 @@ namespace REX } } } - headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin = 0 ) : xmlNode(){ + headWeight::headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; } - headWeight( std::string_view paramSet, std::string& idText){ + headWeight::headWeight( std::string_view paramSet, std::string& idText){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; } - void setId( std::string identity ){ modded = true; idTag = identity; } - protected: - std::string idTag; - long unsigned int id = npos; - void headWriter() override{ + void headWeight::setId( std::string identity ){ modded = true; idTag = identity; } + void headWeight::headWriter(){ if( tags.size() == 0 ){ if( idTag == "" ){ nodeHeader = ""; return; } if( id == npos ){ nodeHeader = ""; return; } @@ -808,7 +773,7 @@ namespace REX } nodeHeader += ">"; } - void headWriter( bool incId ){ + void headWeight::headWriter( bool incId ){ if( !incId ){ headWriter(); return; } if( idTag == "" ){ headWriter(); return; } if( id == npos ){ nodeHeader = "getName() == "weight" ){ continue; } nodeContent += *(child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void headWeight::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } } - void fullWriter() override{ + void headWeight::fullWriter(){ if( isModded() || !isWritten() ){ headWriter(); contWriter(); @@ -845,7 +810,7 @@ namespace REX modded = false; } } - void fullWriter( bool incId, bool hasChildren=true ){ + void headWeight::fullWriter( bool incId, bool hasChildren ){ if( isModded() || !isWritten() ){ headWriter( incId ); contWriter(); @@ -856,27 +821,24 @@ namespace REX written = true; } } - }; // ZW: struct for handling rwgt groups // in the LHE header initrwgt node - struct weightGroup : public xmlNode { - public: - bool getIncId(){ return includeId; } - void setIncId( bool nuIncId ){ includeId = nuIncId; } - std::vector> getWgts(){ return paramSets; } - void addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } - void addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} - weightGroup() : xmlNode(){ name = "weightgroup"; return; } - weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( std::vector nuWgts ) : xmlNode(){ + bool weightGroup::getIncId(){ return includeId; } + void weightGroup::setIncId( bool nuIncId ){ includeId = nuIncId; } + std::vector> weightGroup::getWgts(){ return paramSets; } + void weightGroup::addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } + void weightGroup::addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} + weightGroup::weightGroup() : xmlNode(){ name = "weightgroup"; return; } + weightGroup::weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup::weightGroup( std::vector nuWgts ) : xmlNode(){ name = "weightgroup"; for( auto wgt : nuWgts ){ paramSets.push_back( std::make_shared( wgt ) ); } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + weightGroup::weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -885,7 +847,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + weightGroup::weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -894,7 +856,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + weightGroup::weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -903,7 +865,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + weightGroup::weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -912,7 +874,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + weightGroup::weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ parser( true ); name = "weightgroup"; paramSets.reserve( children.size() ); @@ -921,7 +883,7 @@ namespace REX } for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + weightGroup::weightGroup( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode( originFile, begin, childs ){ name = "weightgroup"; if( parseTop() ){ @@ -934,69 +896,58 @@ namespace REX } } } - protected: - std::string_view rwgtName; - std::string_view wgtNamStrat; - bool includeId = false; - std::vector> paramSets; - bool nu; - std::string_view idTag; - int id; - void headWriter() override{ + void weightGroup::headWriter() { nodeHeader = "nodeWriter()); } } - void childWriter() override{ + void weightGroup::childWriter() { for(auto child : children){ if( child->getName() == "weight" ){ continue; } nodeContent += (*child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void weightGroup::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } return; } - void endWriter() override{ nodeEnd = "\n"; } - }; + void weightGroup::endWriter() { nodeEnd = "\n"; } - struct initRwgt : public xmlNode { - public: - std::vector> getGroups(){ return groups; } - size_t noGrps(){ return groups.size(); } - void addGroup( weightGroup nuGroup ){ + std::vector> initRwgt::getGroups(){ return groups; } + size_t initRwgt::noGrps(){ return groups.size(); } + void initRwgt::addGroup( weightGroup nuGroup ){ modded = true; auto nuGrpPtr = std::make_shared( nuGroup ); if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } } - void addGroup( std::shared_ptr nuGroup ){ + void initRwgt::addGroup( std::shared_ptr nuGroup ){ modded = true; if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } } - void addWgt( unsigned int index, std::shared_ptr nuWgt ){ + void initRwgt::addWgt( unsigned int index, std::shared_ptr nuWgt ){ if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } else throw std::range_error( "Appending weight to uninitialised weightgroup." ); } - void addWgt( unsigned int index, headWeight nuWgt ){ + void initRwgt::addWgt( unsigned int index, headWeight nuWgt ){ if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } else throw std::range_error( "Appending weight to uninitialised weightgroup." ); } - initRwgt() : xmlNode(){ name = "initrwgt"; return; } - initRwgt( std::vector> nuGroups ) : xmlNode(){ + initRwgt::initRwgt() : xmlNode(){ name = "initrwgt"; return; } + initRwgt::initRwgt( std::vector> nuGroups ) : xmlNode(){ name = "initrwgt"; for( auto group : nuGroups ){ groups.push_back( std::make_shared( *group ) ); } } - initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + initRwgt::initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1004,7 +955,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + initRwgt::initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1012,7 +963,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + initRwgt::initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1020,7 +971,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + initRwgt::initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); name = "initrwgt"; groups.reserve( children.size() ); @@ -1028,9 +979,7 @@ namespace REX groups.push_back( std::make_shared( *child ) ); } } - protected: - bool grpIsInit = false; - bool grpInit( std::shared_ptr& wgt ){ + bool initRwgt::grpInit( std::shared_ptr& wgt ){ if( grpIsInit ){ return true; } else{ groups = std::vector>( 1, wgt ); @@ -1038,112 +987,103 @@ namespace REX return false; } } - std::vector> groups; - void contWriter() override{ + void initRwgt::contWriter(){ nodeContent = "\n"; for( auto group : groups ){ nodeContent += (*group->nodeWriter()); } } - void childWriter() override{ + void initRwgt::childWriter(){ for( auto child : children ){ if( child->getName() == "weightgroup" ){ continue; } nodeContent += (*child->nodeWriter()); } } - void childWriter( bool hasChildren ){ + void initRwgt::childWriter( bool hasChildren ){ if( hasChildren ){ childWriter(); } return; } - }; // ZW: struct for handling weights // in event blocks of LHE files - struct bodyWgt : public xmlNode { - public: - void setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } - void setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} - void setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} - void setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} - void setId( std::string nuId ){ + void bodyWgt::setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } + void bodyWgt::setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} + void bodyWgt::setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} + void bodyWgt::setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} + void bodyWgt::setId( std::string nuId ){ modded = true; id = nuId; for( auto tag : tags ){ if( tag->getId() == "id" ){ tag->setVal( id ); return; } } addTag( std::make_shared( "id", id ) ); } - void setModded( bool nuModded ){ modded = nuModded; } - std::string_view getComment(){ return comment; } - std::string_view getValS(){ return valS; } - double getValD(){ return valD; } - bodyWgt() : xmlNode(){ return; } - bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + void bodyWgt::setModded( bool nuModded ){ modded = nuModded; } + std::string_view bodyWgt::getComment(){ return comment; } + std::string_view bodyWgt::getValS(){ return valS; } + double bodyWgt::getValD(){ return valD; } + bodyWgt::bodyWgt() : xmlNode(){ return; } + bodyWgt::bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode( originFile, begin, childs ){ auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); valD = std::stod( valS ); } - bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + bodyWgt::bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + bodyWgt::bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + bodyWgt::bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + bodyWgt::bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + bodyWgt::bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + bodyWgt::bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ parser( true ); valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); valD = std::stod( valS ); } - bodyWgt( double value, std::string& idTag ){ + bodyWgt::bodyWgt( double value, std::string& idTag ){ setVal( value ); id = idTag; addTag( std::make_shared("id",id) ); } - void appendWgt( std::shared_ptr document ){ + void bodyWgt::appendWgt( std::shared_ptr document ){ if( !isWritten() ){ fullWriter(); } *document += *writtenSelf; } - void appendWgt( std::string* document ){ + void bodyWgt::appendWgt( std::string* document ){ if( !isWritten() ){ fullWriter(); } *document += *writtenSelf; } - std::shared_ptr appendWgt( std::string_view document ){ + std::shared_ptr bodyWgt::appendWgt( std::string_view document ){ if(!isWritten() ){ fullWriter(); } auto retDoc = std::make_shared( document ); *retDoc += *writtenSelf; return retDoc; } - protected: - std::string_view comment; - std::string valS; - std::string id; - double valD; - void fullWriter() override { + void bodyWgt::fullWriter() { writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; @@ -1152,7 +1092,6 @@ namespace REX modded = false; written = true; } - }; // ZW: fcn for finding the next block in SLHA format // parameter cards @@ -1199,30 +1138,29 @@ namespace REX // ZW: struct for handling the first line of // LHE format event block - struct evHead { - public: - std::string_view getComment(){ return comment; } - std::string_view getWeight(){ return weight; } - std::string_view getScale(){ return scale; } - std::string_view getAQED(){ return aqed; } - std::string_view getAQCD(){ return aqcd; } - std::string_view getNprt(){ return nprt; } - std::string_view getProcID(){ return procid; } - bool isModded(){ return modded; } - bool isWritten(){ return written; } - void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } - void setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } - void setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } - void setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } - void setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } - void setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } - std::shared_ptr getContent(){ + std::string_view evHead::getComment(){ return comment; } + std::string_view evHead::getWeight(){ return weight; } + std::string_view evHead::getScale(){ return scale; } + std::string_view evHead::getAQED(){ return aqed; } + std::string_view evHead::getAQCD(){ return aqcd; } + std::string_view evHead::getNprt(){ return nprt; } + std::string_view evHead::getProcID(){ return procid; } + bool evHead::isModded(){ return modded; } + bool evHead::isWritten(){ return written; } + void evHead::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void evHead::setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } + void evHead::setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } + void evHead::setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } + void evHead::setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } + void evHead::setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } + void evHead::setNprt( int nuNprt ){ modded = true; nprtint = nuNprt; nprtstr = std::to_string(nuNprt); nprt = nprtstr;} + void evHead::setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } + std::shared_ptr evHead::getContent(){ if( !isWritten() || isModded() ){ writer(); } return content; } - evHead(){ return; } - evHead( const std::string_view originFile, size_t beginLine = 0, size_t endLine = npos ) + evHead::evHead(){ return; } + evHead::evHead( const std::string_view originFile, size_t beginLine, size_t endLine ) { if( originFile.size() == 0){ return; } beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); @@ -1236,19 +1174,7 @@ namespace REX aqed = evLine->at(4); aqcd = evLine->at(5); } - protected: - std::shared_ptr content; - std::string_view sourceFile; - std::string_view comment; - std::string_view weight; - std::string_view scale; - std::string_view aqed; - std::string_view aqcd; - std::string_view nprt; - std::string_view procid; - bool modded = false; - bool written = false; - void writer(){ + void evHead::writer(){ if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } auto retText = std::make_shared( " " ); @@ -1260,49 +1186,46 @@ namespace REX modded = false; written = true; } - }; // ZW: struct for handling particle lines // in LHE format event block - struct lhePrt{ - public: - std::string_view getLine(){ return sourceFile; } - std::string_view getComment(){ return comment; } - std::vector getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } - std::string_view getE(){ return energy; } - std::string_view getMass(){ return mass; } - std::string_view getVTim(){ return vtim; } - std::string_view getSpin(){ return spin; } - std::string_view getPDG(){ return pdg; } - std::string_view getStatus(){ return status; } - std::vector getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } - std::vector getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } - void setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } - void setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } - void setMass( std::string_view nuM ){ modded = true; mass = nuM; } - void setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } - void setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } - void setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } - void setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } - void setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } - void setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } - bool isModded(){ return modded; } - bool isWritten(){ return written; } - std::shared_ptr getContent(){ + std::string_view lhePrt::getLine(){ return sourceFile; } + std::string_view lhePrt::getComment(){ return comment; } + std::vector lhePrt::getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } + std::string_view lhePrt::getE(){ return energy; } + std::string_view lhePrt::getMass(){ return mass; } + std::string_view lhePrt::getVTim(){ return vtim; } + std::string_view lhePrt::getSpin(){ return spin; } + std::string_view lhePrt::getPDG(){ return pdg; } + std::string_view lhePrt::getStatus(){ return status; } + std::vector lhePrt::getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } + std::vector lhePrt::getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } + void lhePrt::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void lhePrt::setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } + void lhePrt::setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } + void lhePrt::setMass( std::string_view nuM ){ modded = true; mass = nuM; } + void lhePrt::setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } + void lhePrt::setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } + void lhePrt::setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } + void lhePrt::setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } + void lhePrt::setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } + void lhePrt::setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } + bool lhePrt::isModded(){ return modded; } + bool lhePrt::isWritten(){ return written; } + std::shared_ptr lhePrt::getContent(){ if( !isWritten() || isModded() ){ writer(); } return content; } - lhePrt(){ return; } - // lhePrt( std::pair prtInfo ){ - // status = std::to_string( prtInfo.first ); - // pdg = std::to_string( prtInfo.second ); - // } - lhePrt( std::pair& prtInfo ){ + lhePrt::lhePrt(){ return; } + lhePrt::lhePrt( std::pair& prtInfo ){ status = std::to_string( prtInfo.first ); pdg = std::to_string( prtInfo.second ); } - lhePrt( const std::string_view originFile, const size_t& beginLine = 0, const size_t& endLine = npos ) + lhePrt::lhePrt( std::pair& prtInfo ){ + status = std::string_view( prtInfo.first ); + pdg = std::string_view( prtInfo.second ); + } + lhePrt::lhePrt( const std::string_view originFile, const size_t& beginLine, const size_t& endLine ) { sourceFile = originFile.substr( beginLine, endLine - beginLine ); auto evLine = nuWordSplitter( sourceFile ); @@ -1319,22 +1242,7 @@ namespace REX spin = evLine->at(12); if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } } - protected: - std::shared_ptr content; - std::string_view sourceFile; - std::string_view comment; - std::string_view mom[3]; - std::string_view energy; - std::string_view mass; - std::string_view vtim; - std::string_view spin; - std::string_view pdg; - std::string_view status; - std::string_view mothers[2]; - std::string_view icol[2]; - bool modded = false; - bool written = false; - void writer(){ + void lhePrt::writer(){ if( isWritten() && !isModded() ){ return; } if( !isModded() ){ content = std::make_shared( sourceFile ); return; } *content = ""; @@ -1349,26 +1257,23 @@ namespace REX modded = false; written = true; } - }; // ZW: struct for handling LHE format event block - struct event : public xmlNode { - public: - evHead getHead(){ return header; } - std::vector> getPrts(){ return prts; } - std::vector> getWgts(){ return rwgt; } - void setHead( evHead head ){ modded = true; header = head; } - void addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } - void addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } - void setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } - void addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } - void addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } - void addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } - void addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } - bool newWeight(){ return addedWgt; } - int getNprt(){ return prts.size(); } - bool isModded() override{ return modded; } - bool isModded( bool deep ) override { + evHead event::getHead(){ return header; } + std::vector> event::getPrts(){ return prts; } + std::vector> event::getWgts(){ return rwgt; } + void event::setHead( evHead head ){ modded = true; header = head; } + void event::addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } + void event::addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } + void event::setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } + void event::addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } + void event::addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } + bool event::newWeight(){ return addedWgt; } + int event::getNprt(){ return prts.size(); } + bool event::isModded() { return modded; } + bool event::isModded( bool deep ) { if( !deep ){ return modded; } bool modStat = modded; for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } @@ -1377,18 +1282,24 @@ namespace REX for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } return modStat; } - event(){ return; } - event( std::vector>& prtInfo ){ + event::event(){ return; } + event::event( std::vector>& prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); for( auto& prt : prtInfo ){ prts.push_back( std::make_shared( prt ) ); } } - event( std::vector> prtInfo ){ + event::event( std::vector>& prtInfo ){ + header.setNprt( prtInfo.size() ); + for( auto& prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event::event( std::vector> prtInfo ){ header.setNprt( std::to_string( prtInfo.size() ) ); prts = prtInfo; } - event( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + event::event( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs) { xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); if( trueStart == npos ){ return; } @@ -1400,7 +1311,7 @@ namespace REX prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); } } - event( const xmlNode& originFile ) + event::event( const xmlNode& originFile ) : xmlNode( originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1411,7 +1322,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( const xmlNode* originFile ) + event::event( const xmlNode* originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1422,7 +1333,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( const std::shared_ptr& originFile ) + event::event( const std::shared_ptr& originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1433,7 +1344,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( xmlTree& originFile ) + event::event( xmlTree& originFile ) : xmlNode( originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1444,7 +1355,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( xmlTree* originFile ) + event::event( xmlTree* originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1455,7 +1366,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event( std::shared_ptr originFile ) + event::event( std::shared_ptr originFile ) : xmlNode( *originFile ) { size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); @@ -1466,39 +1377,28 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - bool prtsAreMod(){ + bool event::prtsAreMod(){ for( auto prt : prts ){ if( prt->isModded() ){ return true; } } return false; } - bool headIsMod(){ + bool event::headIsMod(){ return header.isModded(); } - bool isSpecSort() const { return specSorted; } - sortFcn getSortFcn() const { return eventSort; } - statSort getStatSort() const { return specSort; } - protected: - std::vector> rwgt; - std::shared_ptr childRwgt; - bool hasRwgt(){ + bool event::isSpecSort() const { return specSorted; } + sortFcn event::getSortFcn() const { return eventSort; } + statSort event::getStatSort() const { return specSort; } + bool event::hasRwgt(){ if( rwgt.size() > 0 ){ return true; } return false; } - bool rwgtChild(){ + bool event::rwgtChild(){ if( childRwgt != nullptr ){ return true; } for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } return false; } - bool bothRwgt(){ return (hasRwgt() && rwgtChild() ); } - bool eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } - evHead header; - bool hasBeenProc = false; - std::vector> prts; - std::map> procMap; - std::map> procOrder; - sortFcn eventSort = []( std::vector vec ){ return stodSort( vec ); }; - statSort specSort = []( std::string_view stat, std::vector vec ){ return stodSort( vec ); }; - bool specSorted = false; - bool initProcMap(bool hard = false) + bool event::bothRwgt(){ return (hasRwgt() && rwgtChild() ); } + bool event::eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } + bool event::initProcMap(bool hard) { if(!hard){ if( procMap.size() > 0 ){ return true; } } for( auto prt : prts ){ @@ -1514,7 +1414,7 @@ namespace REX hasBeenProc = true; return true; } - bool initProcMap( sortFcn sorter, bool hard = false ) + bool event::initProcMap( sortFcn sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } specSorted = false; @@ -1532,7 +1432,7 @@ namespace REX hasBeenProc = true; return true; } - bool initProcMap( statSort sorter, bool hard = false ) + bool event::initProcMap( statSort sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } specSorted = true; @@ -1550,29 +1450,29 @@ namespace REX hasBeenProc = true; return true; } - bool inRwgtChild( std::string_view name ){ + bool event::inRwgtChild( std::string_view name ){ for( auto child : childRwgt->getChildren() ){ for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } } return false; } - bool checkRwgtOverlap(){ + bool event::checkRwgtOverlap(){ for( auto wgt : rwgt ){ for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } } return false; } - void childRwgtWriter(){ + void event::childRwgtWriter(){ if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } } - void vecRwgtWriter( bool midNode = false ){ + void event::vecRwgtWriter( bool midNode ){ if( !midNode ){ nodeContent += "\n"; } for( auto wgt : rwgt ){ nodeContent += *wgt->nodeWriter(); } nodeContent += "\n"; } - void rwgtWriter(){ + void event::rwgtWriter(){ if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } childRwgtWriter(); nodeContent.erase( nodeContent.size() - 8, 8 ); @@ -1583,20 +1483,19 @@ namespace REX if( rwgtChild() ){ childRwgtWriter(); return; } } } - void contWriter() override { + void event::contWriter() { nodeContent = "\n" + *header.getContent(); for( auto prt : prts ){ nodeContent += *prt->getContent(); } } - void childWriter() override { + void event::childWriter() { for( auto child : children ){ if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } nodeContent += *child->nodeWriter(); } } - bool addedWgt = false; - void fullWriter() override { + void event::fullWriter() { if( isModded( false ) ){ headWriter(); contWriter(); @@ -1610,7 +1509,7 @@ namespace REX written = true; } } - void fullWriter( bool deep ){ + void event::fullWriter( bool deep ){ if( !deep ){ fullWriter(); return; } if( isModded( true ) ){ headWriter(); @@ -1626,7 +1525,7 @@ namespace REX written = true; } } - void appendWgts(){ + void event::appendWgts(){ if( !addedWgt ){ return; } writtenSelf->erase( writtenSelf->size() - 17, 17 ); for( auto wgt : rwgt ){ @@ -1634,67 +1533,66 @@ namespace REX } *writtenSelf += "\n
\n"; } - public: - std::shared_ptr nodeWriter() override { + std::shared_ptr event::nodeWriter() { if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } if( addedWgt ){ appendWgts(); } return writtenSelf; } - std::shared_ptr nodeWriter( bool recursive ){ + std::shared_ptr event::nodeWriter( bool recursive ){ if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } if( addedWgt ){ appendWgts(); } return writtenSelf; } - auto &getProc(){ + std::map> &event::getProc(){ if( initProcMap() ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(){ + std::map> &event::getProcOrder(){ if( initProcMap() ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProc() const{ + std::map> event::getProc() const { if ( hasBeenProc ){ return procMap; } else throw std::runtime_error("Const declaration of event node before it has been procesed."); } - auto &getProcOrder() const{ + std::map> event::getProcOrder() const { if ( hasBeenProc ){ return procOrder; } else throw std::runtime_error("Const declaration of event node before it has been procesed."); } - auto &getProc(sortFcn sorter){ + std::map> &event::getProc(sortFcn sorter){ if( initProcMap(sorter) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(sortFcn sorter){ + std::map> &event::getProcOrder(sortFcn sorter){ if( initProcMap(sorter) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProc(statSort sorter){ + std::map> &event::getProc(statSort sorter){ if( initProcMap(sorter) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - auto &getProcOrder(statSort sorter){ + std::map> &event::getProcOrder(statSort sorter){ if( initProcMap(sorter) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - }; + + event& makeEv( std::vector>& particles ){ + auto returnEvent = event( particles ); + return returnEvent; + } + + std::vector> getParticles( event& ev ){ + return ev.getPrts(); + } // ZW: struct for handling the first line of // LHE format init tag - struct lheInitHead{ - public: - std::string_view idbmup[2]; - std::string_view ebmup[2]; - std::string_view pdfgup[2]; - std::string_view pdfsup[2]; - std::string_view idwtup; - std::string_view nprup; - bool isWritten(){ return written; } - bool isModded(){ return modded; } - std::shared_ptr getContent(){ + bool lheInitHead::isWritten(){ return written; } + bool lheInitHead::isModded(){ return modded; } + std::shared_ptr lheInitHead::getContent(){ if( isModded() || !isWritten() ){ writer(); } return content; } - lheInitHead( std::string_view initHead ){ + lheInitHead::lheInitHead( std::string_view initHead ){ auto vals = *nuBlankSplitter( initHead ); if( vals.size() < 10 ){ return; } idbmup[0] = vals[0]; idbmup[1] = vals[1]; @@ -1703,7 +1601,7 @@ namespace REX pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; idwtup = vals[8]; nprup = vals[9]; } - lheInitHead( xmlNode& initNode ) + lheInitHead::lheInitHead( xmlNode& initNode ) { if( initNode.getName() != "init" ){ return; } auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; @@ -1715,33 +1613,22 @@ namespace REX pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; idwtup = vals[8]; nprup = vals[9]; } - protected: - std::shared_ptr content; - bool written = false; - bool modded = false; - void writer(){ + void lheInitHead::writer(){ *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; written = true; modded = false; } - }; // ZW: struct for handling process lines // in LHE format init tag - struct lheInitLine { - public: - std::string_view xsecup; - std::string_view xerrup; - std::string_view xmaxup; - std::string_view lprup; - bool isWritten(){ return written; } - bool isModded(){ return modded; } - std::shared_ptr getContent(){ + bool lheInitLine::isWritten(){ return written; } + bool lheInitLine::isModded(){ return modded; } + std::shared_ptr lheInitLine::getContent(){ if( isModded() || !isWritten() ){ writer(); } return content; } - lheInitLine(){} - lheInitLine( std::string_view procLine ) + lheInitLine::lheInitLine(){} + lheInitLine::lheInitLine( std::string_view procLine ) { auto vals = *nuBlankSplitter( procLine ); if( vals.size() < 4 ){ return; } @@ -1750,33 +1637,20 @@ namespace REX xmaxup = vals[2]; lprup = vals[3]; } - protected: - std::shared_ptr content; - bool written = false; - bool modded = false; - void writer(){ + void lheInitLine::writer(){ *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; written = true; modded = false; } - }; // ZW: struct for handling single parameter line in // SLHA format parameter card - struct paramVal{ - public: - double value = 0; - int id = 0; - std::string_view realLine; - std::string_view comment; - std::string_view idStr; - std::string_view valStr; - virtual void parse(){ + void paramVal::parse(){ id = std::stoi( std::string(idStr) ); value = std::stod( std::string(valStr) ); } - paramVal(){ realLine = ""; idStr = ""; valStr = ""; } - paramVal( std::string_view paramLine, bool parseOnline = false ) + paramVal::paramVal(){ realLine = ""; idStr = ""; valStr = ""; } + paramVal::paramVal( std::string_view paramLine, bool parseOnline ) { if( paramLine.find("\n") != npos ){ auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); @@ -1800,9 +1674,8 @@ namespace REX } parse(); } } - bool isMod(){ return modded; } - bool modded = false; - virtual std::shared_ptr selfWrite(){ + bool paramVal::isMod(){ return modded; } + std::shared_ptr paramVal::selfWrite(){ auto writeVal = std::make_shared(""); if( isMod() ) { @@ -1816,13 +1689,10 @@ namespace REX else{ *writeVal = std::string( realLine ) + "\n"; } return writeVal; } - }; // ZW: struct for handling single DECAY line // in SLHA format parameter card - struct decVal : public paramVal{ - public: - void parse() override { + void decVal::parse() { auto vals = *nuBlankSplitter( realLine ); id = std::stoi( std::string(vals[1]) ); value = std::stod( std::string(vals[2]) ); @@ -1832,11 +1702,11 @@ namespace REX comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); } } - decVal( std::string_view paramLine = "", bool parseOnline = false ) : paramVal( paramLine, false ) + decVal::decVal( std::string_view paramLine, bool parseOnline ) : paramVal( paramLine, false ) { if( parseOnline ){ parse(); } } - std::shared_ptr selfWrite() override { + std::shared_ptr decVal::selfWrite() { auto writeVal = std::make_shared(""); if( isMod() ) { @@ -1849,19 +1719,10 @@ namespace REX else{ *writeVal = std::string( realLine ) + "\n"; } return writeVal; } - }; // ZW: struct for handling parameter block // in SLHA format parameter card - struct paramBlock { - public: - std::string_view realBlock; - size_t startPt; - std::string_view comment; - std::string_view initComm; - std::string_view name; - std::vector params; - virtual void parse( bool parseOnline = false ){ + void paramBlock::parse( bool parseOnline ){ if( realBlock.size() == 0 ){ return; } if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); @@ -1875,16 +1736,15 @@ namespace REX params.push_back( paramVal( line, parseOnline ) ); } } - paramBlock(){ return; } - paramBlock( std::string_view paramSet, bool parseOnline = false ) + paramBlock::paramBlock(){ return; } + paramBlock::paramBlock( std::string_view paramSet, bool parseOnline ) { realBlock = paramSet; startPt = clStringFind( realBlock, std::string("\nB") ); if( parseOnline ){ parse(parseOnline); } } - bool isMod(){ return modded; } - bool modded = false; - virtual std::shared_ptr selfWrite(){ + bool paramBlock::isMod(){ return modded; } + std::shared_ptr paramBlock::selfWrite(){ auto writeBlock = std::make_shared(""); if( isMod() ) { @@ -1905,14 +1765,10 @@ namespace REX } } return writeBlock; } - }; // ZW: struct for handling DECAY lines // in SLHA format parameter card - struct decBlock : public paramBlock { - public: - std::vector decays; - void parse( bool parseOnline = false ) override{ + void decBlock::parse( bool parseOnline ){ if( realBlock.size() == 0 ){ return; } auto decLines = clFindEach( realBlock, std::string("\ndecay") ); decays.reserve(decLines->size()); @@ -1925,7 +1781,7 @@ namespace REX decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); } } - void parse( std::shared_ptr> decLines, bool parseOnline = false ) { + void decBlock::parse( std::shared_ptr> decLines, bool parseOnline ) { decays.reserve(decLines->size()); if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } @@ -1936,12 +1792,12 @@ namespace REX decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); } } - decBlock( std::string_view paramSet = "", bool parseOnline = false ) : paramBlock( paramSet, parseOnline ) + decBlock::decBlock( std::string_view paramSet, bool parseOnline ) : paramBlock( paramSet, parseOnline ) { realBlock = paramSet; if( parseOnline ){ parse(parseOnline); } } - std::shared_ptr selfWrite() override { + std::shared_ptr decBlock::selfWrite() { auto writeBlock = std::make_shared(""); *writeBlock += "\n"; for ( auto val : decays ) @@ -1950,27 +1806,9 @@ namespace REX } return writeBlock; } - }; // ZW: struct for handling SLHA parameter cards - struct lesHouchesCard { - public: - decBlock decays; - std::string_view xmlFile; - size_t start; - size_t end; - bool modded; - bool parsed; - std::string_view header; - std::vector blocks; - size_t blockStart; - std::function lambda = [&]( size_t& conPt, const std::string_view& file ) - { return !( file[conPt+1] == ' ' || file[conPt+1] == '#' || file[conPt+1] == '\n' ); }; - std::function lambdaNu = [&]( size_t& conPt, const std::string_view& file ) - { return !( file[conPt+1] == ' ' || file[conPt+1] == '\n' || file[conPt+1] == '<'); }; - std::function lambdaD = [&]( size_t& conPt, const std::string_view& file ) - { return !( clStringComp(file.substr(conPt+1, 1), std::string("d") ) ); }; - void parse( bool parseOnline = false ) + void lesHouchesCard::parse( bool parseOnline ) { if( parsed ){ return; } if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } @@ -1987,14 +1825,14 @@ namespace REX decays.parse( decLines, parseOnline ); parsed = true; } - lesHouchesCard( const std::string_view originFile = "", const size_t& begin = 0, bool parseOnline = false ){ + lesHouchesCard::lesHouchesCard( const std::string_view originFile, const size_t& begin, bool parseOnline ){ xmlFile = originFile; start = begin; modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" selfWrite(){ + bool lesHouchesCard::isMod(){ return modded; } + std::shared_ptr lesHouchesCard::selfWrite(){ auto writeCard = std::make_shared(header); if( isMod() ) { for( auto block : blocks ) @@ -2006,106 +1844,94 @@ namespace REX } return writeCard; } - }; - struct slhaNode : public xmlNode { - public: - std::shared_ptr getParameters(){ + std::shared_ptr slhaNode::getParameters(){ modded = true; return parameterCard; } - slhaNode() : xmlNode(){} - slhaNode( lesHouchesCard parameters ) : xmlNode(){ + slhaNode::slhaNode() : xmlNode(){} + slhaNode::slhaNode( lesHouchesCard parameters ) : xmlNode(){ parameterCard = std::make_shared( parameters ); pCardInit = true; } - slhaNode( std::shared_ptr parameters ) : xmlNode(){ + slhaNode::slhaNode( std::shared_ptr parameters ) : xmlNode(){ parameterCard = parameters; pCardInit = true; } - slhaNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + slhaNode::slhaNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); } - slhaNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + slhaNode::slhaNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } - slhaNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + slhaNode::slhaNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); } - slhaNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + slhaNode::slhaNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); } - slhaNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + slhaNode::slhaNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); } - slhaNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + slhaNode::slhaNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); } - slhaNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + slhaNode::slhaNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } } - protected: - std::shared_ptr parameterCard; - bool pCardInit = false; - void headWriter() override{ + void slhaNode::headWriter(){ nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; } nodeHeader += ">"; } - void endWriter() override{ nodeEnd += "\n"; } - void contWriter() override{ + void slhaNode::endWriter(){ nodeEnd += "\n"; } + void slhaNode::contWriter(){ if( pCardInit ){ nodeContent = *parameterCard->selfWrite(); } else { nodeContent = content; } } - }; // ZW: struct for handling LHE init nodes - struct initNode : public xmlNode { - public: - std::shared_ptr getHead(){ return initHead; } - std::vector> getLines(){ return initLines; } - void setHead( std::shared_ptr head ){ modded = true; initHead = head; } - void setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } - void addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } - initNode() : xmlNode(){ name = "init"; } - initNode( const std::string_view originFile, const size_t& begin = 0, bool parseOnline = false ) + std::shared_ptr initNode::getHead(){ return initHead; } + std::vector> initNode::getLines(){ return initLines; } + void initNode::setHead( std::shared_ptr head ){ modded = true; initHead = head; } + void initNode::setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } + void initNode::addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } + initNode::initNode() : xmlNode(){ name = "init"; } + initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); } - initNode( xmlNode& node, bool parseOnline = false ) : xmlNode( node ){ + initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlNode* node, bool parseOnline = false ) : xmlNode( *node ){ + initNode::initNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( std::shared_ptr node, bool parseOnline = false ) : xmlNode( *node ){ + initNode::initNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlTree tree, bool parseOnline = false ) : xmlNode( tree ){ + initNode::initNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( std::shared_ptr tree, bool parseOnline = false ) : xmlNode( *tree ){ + initNode::initNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - initNode( xmlTree* tree, bool parseOnline = false ) : xmlNode( *tree ){ + initNode::initNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } - protected: - std::shared_ptr initHead; - std::vector> initLines; - bool parseContent() override{ + bool initNode::parseContent(){ if( content.size() == 0 ){ return false; } auto linebreaks = lineFinder( content ); if( linebreaks->size() == 0 ){ return false; } @@ -2115,19 +1941,16 @@ namespace REX } return true; } - void contWriter() override{ + void initNode::contWriter(){ if( isModded() ){nodeContent = std::string( content ); return; } nodeContent = *initHead->getContent(); for( auto line : initLines ){ nodeContent += *line->getContent(); } } - }; // ZW: struct for explicitly handling LHE header nodes - struct lheHead : public xmlNode { - public: - size_t addWgtGroup( std::shared_ptr& wgtGroup ){ + size_t lheHead::addWgtGroup( std::shared_ptr& wgtGroup ){ hasRwgt = true; modded = true; if( wgtGrpInit( wgtGroup ) ){ @@ -2135,7 +1958,7 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - size_t addWgtGroup( weightGroup wgtGroup ){ + size_t lheHead::addWgtGroup( weightGroup wgtGroup ){ hasRwgt = true; modded = true; auto wgtGrpPtr = std::make_shared( wgtGroup ); @@ -2144,21 +1967,21 @@ namespace REX } return (rwgtNodes->noGrps() - 1); } - void addWgt( size_t index, std::shared_ptr nuWgt ){ + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, headWeight nuWgt ){ + void lheHead::addWgt( size_t index, headWeight nuWgt ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; modded = true; rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; @@ -2166,7 +1989,7 @@ namespace REX nuWgt->setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ + void lheHead::addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ if( index >= (size_t)rwgtNodes->getGroups().size() ) throw std::range_error( "Appending weight to uninitialised weightgroup." ); hasRwgt = true; @@ -2174,15 +1997,15 @@ namespace REX nuWgt.setId( idTagg ); rwgtNodes->addWgt( index, nuWgt ); } - void setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } - void setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } - std::vector> getWgtGroups(){ return rwgtNodes->getGroups(); } - std::shared_ptr getInitRwgt(){ return rwgtNodes; } - std::shared_ptr getParameters(){ return parameters; } - void setParameters( std::shared_ptr params ){ parameters = params; } - bool rwgtInc(){ return hasRwgt; } - lheHead(){ return; } - lheHead( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + void lheHead::setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } + void lheHead::setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } + std::vector> lheHead::getWgtGroups(){ return rwgtNodes->getGroups(); } + std::shared_ptr lheHead::getInitRwgt(){ return rwgtNodes; } + std::shared_ptr lheHead::getParameters(){ return parameters; } + void lheHead::setParameters( std::shared_ptr params ){ parameters = params; } + bool lheHead::rwgtInc(){ return hasRwgt; } + lheHead::lheHead(){ return; } + lheHead::lheHead( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs){ xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} @@ -2191,45 +2014,43 @@ namespace REX if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlNode& node ) : xmlNode(node){ + lheHead::lheHead( xmlNode& node ) : xmlNode(node){ for( auto child : node.getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlNode* node ) : xmlNode(*node){ + lheHead::lheHead( xmlNode* node ) : xmlNode(*node){ for( auto child : node->getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( std::shared_ptr node ) : xmlNode( *node ){ + lheHead::lheHead( std::shared_ptr node ) : xmlNode( *node ){ for( auto child : node->getChildren() ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlTree tree ) : xmlNode( tree ){ + lheHead::lheHead( xmlTree tree ) : xmlNode( tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ + lheHead::lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - lheHead( xmlTree* tree ) : xmlNode( *tree ){ + lheHead::lheHead( xmlTree* tree ) : xmlNode( *tree ){ for( auto child : children ){ if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } } } - protected: - bool wgtGrpIsInit = false; - bool wgtGrpInit( std::shared_ptr& wgtGrp ){ + bool lheHead::wgtGrpInit( std::shared_ptr& wgtGrp ){ if( wgtGrpIsInit ){ return true; } if( rwgtNodes == nullptr ){ rwgtNodes = std::make_shared(); @@ -2238,13 +2059,7 @@ namespace REX return false; } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); } - std::shared_ptr parameters; - bool hasRwgt = false; - std::shared_ptr rwgtNodes; - std::vector> initrwgt; - bool relChildSet = false; - std::vector relChild; - void setRelChild(){ + void lheHead::setRelChild(){ if( relChildSet ){ return; } relChild.reserve( children.size() ); for( size_t k = 0 ; k < children.size() ; ++k ){ @@ -2255,7 +2070,7 @@ namespace REX } relChildSet = true; } - bool parseChildren( bool recursive ){ + bool lheHead::parseChildren( bool recursive ){ bool status = true; for( auto child : children ){ if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } @@ -2265,14 +2080,14 @@ namespace REX } return status; } - void headWriter() override{ + void lheHead::headWriter(){ nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; } nodeHeader += ">\n"; } - void childWriter() override{ + void lheHead::childWriter(){ setRelChild(); for( auto relKid : relChild ){ nodeContent += *(children[relKid]->nodeWriter()); @@ -2282,7 +2097,7 @@ namespace REX nodeContent += *rwgtNodes->nodeWriter(); } } - void fullWriter() override{ + void lheHead::fullWriter(){ if( isModded() ){ headWriter(); contWriter(); @@ -2292,20 +2107,14 @@ namespace REX written = true; } } - }; // ZW: struct for keeping track of appended weights in LHE node, // since weight information is stored both in the header // and in the individual events - struct newWgt{ - protected: - std::shared_ptr headWgt; - std::vector> bodyWgts; - public: - newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ + newWgt::newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ headWgt = heaWgt; bodyWgts = bodWgts; } - newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ + newWgt::newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ headWgt = heaWgt; bodyWgts = std::vector>(wgts->size()); auto idTag = std::string(headWgt->getTag()); @@ -2319,14 +2128,14 @@ namespace REX } } } - newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + newWgt::newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag ){ headWgt = std::make_shared(parameters, idTag); bodyWgts = std::vector>(wgts->size()); for( size_t i = 0 ; i < wgts->size() ; ++i ){ bodyWgts[i] = std::make_shared(wgts->at(i), idTag); } } - newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag = "rex_rwgt" ){ + newWgt::newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag ){ std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); headWgt = std::make_shared(parameters, newTag); bodyWgts = std::vector>(wgts->size()); @@ -2334,15 +2143,15 @@ namespace REX bodyWgts[i] = std::make_shared(wgts->at(i), newTag); } } - newWgt( std::string& parameters ){ + newWgt::newWgt( std::string& parameters ){ headWgt = std::make_shared(parameters); } - newWgt( std::string& parameters, std::string& idTag ){ + newWgt::newWgt( std::string& parameters, std::string& idTag ){ headWgt = std::make_shared(parameters, idTag); } - std::shared_ptr getHeadWgt(){ return headWgt; } - std::vector> getBodyWgts(){ return bodyWgts; } - void addBdyWgts( std::shared_ptr> wgts ){ + std::shared_ptr newWgt::getHeadWgt(){ return headWgt; } + std::vector> newWgt::getBodyWgts(){ return bodyWgts; } + void newWgt::addBdyWgts( std::shared_ptr> wgts ){ auto idTag = std::string(headWgt->getTag()); if( idTag != "" ){ for( size_t i = 0 ; i < wgts->size() ; ++i ){ @@ -2354,13 +2163,10 @@ namespace REX } } } - }; // ZW: general struct for handling LHE files explicitly - struct lheNode : public xmlNode { - public: - lheNode() : xmlNode(){} - lheNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ) + lheNode::lheNode() : xmlNode(){} + lheNode::lheNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs){ //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} @@ -2370,76 +2176,69 @@ namespace REX if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } } } - auto getHeader(){ return header; } - auto getInit(){ return init; } - auto& getEvents(){ return events; } - bool isModded() override{ return modded; } - bool isModded( bool deep ) override{ + std::shared_ptr lheNode::getHeader(){ return header; } + std::shared_ptr lheNode::getInit(){ return init; } + std::vector> lheNode::getEvents(){ return events; } + bool lheNode::isModded(){ return modded; } + bool lheNode::isModded( bool deep ){ if( !deep ){ return isModded(); } bool modStat = isModded(); for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } return modStat; } - void setInit( std::shared_ptr initNod ){ init = initNod; } - void setHeader( std::shared_ptr headNod ){ header = headNod; } - void addWgt( size_t index, newWgt& addedWgt ){ + void lheNode::setInit( std::shared_ptr initNod ){ init = initNod; } + void lheNode::setHeader( std::shared_ptr headNod ){ header = headNod; } + void lheNode::addWgt( size_t index, newWgt& addedWgt ){ header->addWgt( index, addedWgt.getHeadWgt() ); auto wgtsVec = addedWgt.getBodyWgts(); for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } - void addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ + void lheNode::addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ header->addWgt( index, addedWgt.getHeadWgt(), idTag ); auto wgtsVec = addedWgt.getBodyWgts(); for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ events[k]->addWgt( wgtsVec[k] ); } } - void setRelStats( std::vector& particles ){ + void lheNode::setRelStats( std::vector& particles ){ relStat = particles; } - std::vector& getRelStats(){ + std::vector& lheNode::getRelStats(){ return relStat; } - void setSameSort( sortFcn& sortF ){ + void lheNode::setSameSort( sortFcn& sortF ){ particleSort = sortF; } - sortFcn& getSameSort(){ + sortFcn& lheNode::getSameSort(){ return particleSort; } - void setStatSort( statSort& statS ){ + void lheNode::setStatSort( statSort& statS ){ statParticleSort = statS; } - statSort& getStatSort(){ + statSort& lheNode::getStatSort(){ return statParticleSort; } - protected: - std::vector> events = {}; - std::shared_ptr header = std::make_shared(xmlFile, start); - std::shared_ptr init = std::make_shared(xmlFile, start); - std::vector relStat = {"-1", "1"}; - sortFcn particleSort = []( std::vector prts ){ return stodSort(prts); }; - statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stodSort(prts); }; - virtual void headerWriter(){ + void lheNode::headerWriter(){ nodeContent += "\n" + *header->nodeWriter(); } - virtual void initWriter(){ + void lheNode::initWriter(){ nodeContent += *init->nodeWriter(); } - virtual void eventWriter(){ + void lheNode::eventWriter(){ for( auto event : events ){ nodeContent += *event->nodeWriter(); } } - void contWriter() override{ + void lheNode::contWriter(){ nodeContent = ""; headerWriter(); initWriter(); eventWriter(); } - void fullWriter() override{ + void lheNode::fullWriter(){ if( isModded( true ) ){ headWriter(); contWriter(); @@ -2452,12 +2251,10 @@ namespace REX written = true; } } - public: - virtual std::shared_ptr nodeWriter() { + std::shared_ptr lheNode::nodeWriter() { if( isModded( true ) || !isWritten() ){ fullWriter(); } return writtenSelf; } - }; // ZW: function for extracting event information from // LHE files @@ -2783,20 +2580,18 @@ namespace REX return true; } - struct eventComp{ - bool operator()( event& firstEv, event& secEv){ + bool eventComp::operator()( event& firstEv, event& secEv){ if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} } - bool operator()( const event& firstEv, const event& secEv) const { + bool eventComp::operator()( const event& firstEv, const event& secEv) const { if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} } - bool operator()(event& firstEv, event& secEv, std::vector statVec){ + bool eventComp::operator()(event& firstEv, event& secEv, std::vector statVec){ if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} } - }; // ZW: fcn for checking whether a list of pdgXtract format // processes sourceProcList contains a given process newProc @@ -3158,16 +2953,7 @@ namespace REX } // ZW: transposed event information struct - struct evtInfo { - public: - std::vector wgts; - std::vector scales; - std::vector aQEDs; - std::vector aQCDs; - std::vector nprts; - std::vector relNPrts; - std::vector procIDs; - evtInfo( const std::vector>& lheFile = {} ){ + evtInfo::evtInfo( const std::vector>& lheFile ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) @@ -3180,7 +2966,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); for( auto evt : lheFile ) @@ -3195,7 +2981,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec, + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, sortFcn sorter ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); @@ -3211,7 +2997,7 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - evtInfo( const std::vector>& lheFile, const std::vector& statVec, + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, statSort sorter ){ int nEvt = lheFile.size(); wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); @@ -3227,20 +3013,9 @@ namespace REX procIDs.push_back(evt->getHead().getProcID()); } } - }; // ZW: transposed particle information struct - struct prtInfo { - public: - std::vector moms; - std::vector masses; - std::vector vtims; - std::vector spins; - std::vector statuses; - std::vector mothers; - std::vector icols; - std::vector pdgs; - prtInfo( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); @@ -3264,7 +3039,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); @@ -3292,7 +3067,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, sortFcn sorter ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); @@ -3321,7 +3096,7 @@ namespace REX } } } - prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, statSort sorter ){ int nEvt = lheFile.size(); moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); @@ -3350,49 +3125,36 @@ namespace REX } } } - }; // ZW: transposed LHE file with a single process type - struct transMonoLHE { - public: - evtInfo evtsHead; - prtInfo evtsData; - std::shared_ptr process; - transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ){ + transMonoLHE::transMonoLHE( const std::vector>& lheFile , const int nPrt ){ evtsHead = evtInfo(lheFile); evtsData = prtInfo(lheFile, nPrt); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, sortFcn sorter, - std::vector statVec = { "-1", "1" } ){ + std::vector statVec ){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec, sorter); process = lheFile[0]; } - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, statSort sorter, - std::vector statVec = { "-1", "1" } ){ + std::vector statVec){ evtsHead = evtInfo(lheFile, statVec); evtsData = prtInfo(lheFile, nPrt, statVec, sorter); process = lheFile[0]; } - }; // ZW: transposed LHE file ordered by subprocess - struct transLHE { - public: - std::string_view xmlFile; - std::vector> subProcs; - std::vector> procSets; - std::vector>> relProcs; - transLHE(){ return; } - transLHE( lheNode& lheFile ) + transLHE::transLHE(){ return; } + transLHE::transLHE( lheNode& lheFile ) { procSets = evProcessPull( lheFile ); relProcs = evProcOrder( lheFile, procSets ); @@ -3404,9 +3166,9 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); } } - transLHE( lheNode& lheFile, + transLHE::transLHE( lheNode& lheFile, sortFcn sorter, - const std::vector& statVec = { "-1", "1" } ) + const std::vector& statVec ) { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); @@ -3418,9 +3180,9 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); } } - transLHE( lheNode& lheFile, + transLHE::transLHE( lheNode& lheFile, statSort sorter, - const std::vector& statVec = { "-1", "1" } ) + const std::vector& statVec) { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); @@ -3432,7 +3194,7 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); } } - transLHE( lheNode& lheFile, const std::vector& statVec ) + transLHE::transLHE( lheNode& lheFile, const std::vector& statVec ) { procSets = evProcessPull( lheFile, statVec ); relProcs = evProcOrder( lheFile, procSets, statVec ); @@ -3444,15 +3206,15 @@ namespace REX subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); } } - template - std::shared_ptr> vectorFlat( std::vector>> vecVec ) +// template + std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) { if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); for( size_t k = 0 ; k < vecVec.size() ; ++k){ if( vecVec[k]->size() == relProcs[k]->size() ) continue; else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); } - auto flatVec = std::make_shared>(relProcs[0]->size()); + auto flatVec = std::make_shared>(relProcs[0]->size()); for( size_t k = 0 ; k < relProcs.size() ; ++k ){ size_t currInd = 0; for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ @@ -3464,7 +3226,6 @@ namespace REX } return flatVec; } - }; // ZW: vector transformation string_to_double std::shared_ptr> vecStoD( const std::vector dataVec ) @@ -3500,53 +3261,21 @@ namespace REX // ZW: bool struct to define which double values // to extract transposed from LHE file - struct lheRetDs{ - public: - bool ebmup = false; - bool xsecup = false; - bool xerrup = false; - bool xmaxup = false; - bool xwgtup = false; - bool scalup = false; - bool aqedup = false; - bool aqcdup = false; - bool pup = true; - bool mass = false; - bool vtimup = false; - bool spinup = false; - std::vector getBools(){ + std::vector lheRetDs::getBools(){ return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, pup, mass, vtimup, spinup }; } - }; // ZW: bool struct to define which int values // to extract transposed from LHE file - struct lheRetInts{ - public: - //bool maxpup = false; - bool idbmup = false; - bool pdfgup = false; - bool pdfsup = false; - bool idwtup = false; - bool nprup = false; - bool lprup = false; - //bool maxnup = false; - bool nup = true; - bool idprup = false; - bool idup = true; - bool istup = true; - bool mothup = false; - bool icolup = false; - std::vector getBools(){ + std::vector lheRetInts::getBools(){ return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, nup, idprup, idup, istup, mothup, icolup }; } - }; // ZW: function for extracting transposed double values // from LHE file - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals = lheRetDs() ) + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals ) { // ZW: hard-setting returning g_S instead of a_S for now bool aStogS = true; @@ -3605,7 +3334,7 @@ namespace REX return lheDos; } - std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ) + std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals ) { // ZW: hard-setting returning g_S instead of a_S for now bool aStogS = true; @@ -3960,4 +3689,4 @@ namespace REX } } -#endif \ No newline at end of file +#endif diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 4fe4023730..7fa2ab2b5f 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -30,6 +30,7 @@ int usage( char* argv0, int ret = 1 ) int main( int argc, char** argv ){ + std::cout << "Starting reweighting driver...\n"; std::string lheFilePath; std::string rwgtCardPath; std::string outputPath; @@ -38,9 +39,8 @@ int main( int argc, char** argv ){ if (argc < 2){ return usage( argv[0] ); } - // READ COMMAND LINE ARGUMENTS - for( int i = 1; i <= argc; i++ ) + for( int i = 1; i < argc; i++ ) { auto currArg = std::string( argv[i] ); if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) @@ -55,11 +55,12 @@ int main( int argc, char** argv ){ } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); } - { + else { return usage( argv[0] ); } } + if( lheFilePath.empty() || rwgtCardPath.empty() ){ return usage( argv[0] ); } @@ -76,12 +77,16 @@ int main( int argc, char** argv ){ if( onWindows ){ if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ slhaPath = "..\\..\\Cards\\param_card.dat"; + } else if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 3 ) == "Sub" ){ + slhaPath = "..\\Cards\\param_card.dat"; } else{ slhaPath = "\\Cards\\param_card.dat"; } } else { if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ slhaPath = "../../Cards/param_card.dat"; + } else if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 3 ) == "Sub" ) { + slhaPath = "../Cards/param_card.dat"; } else { slhaPath = "/Cards/param_card.dat"; } @@ -98,7 +103,7 @@ int main( int argc, char** argv ){ REX::teaw::ampCall subProcSet; for( auto proc : runSet ){ - subProcSet.insert( REX::teaw::ampPair( proc.procEvent, proc.bridgeCall ) ); + subProcSet.insert( REX::teaw::ampPair( proc.procEventInt, proc.bridgeCall ) ); } //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc new file mode 100644 index 0000000000..a927754625 --- /dev/null +++ b/tools/REX/rwgt_instance.cc @@ -0,0 +1,78 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// Library including generic functions and classes for event reweighting. +// Process-specific rwgt_runner files are generated by mg5amc@nlo and use +// this library, while the rwgt_driver file is a wrapping program that +// calls the process-specific runners for given subprocesses. +//========================================================================== + +#ifndef _RWGT_INSTANCE_CC_ +#define _RWGT_INSTANCE_CC_ + +#include "rwgt_instance.h" + +namespace rwgt{ + + + //ZW: Function for calculating the number of remaining events in a warp + // in order to pad the input arrays to a multiple of the warp size + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp ){ + return (nWarp - ( nEvt % nWarp )) % nWarp; + } + + //ZW: Function for padding the input arrays to a multiple of the warp size + template + std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ + auto nEvt = input->size(); + auto nWarpRemain = warpRemain( nEvt, nWarp ); + auto fauxNEvt = nEvt + nWarpRemain; + auto output = std::make_shared>( fauxNEvt ); + std::copy( input.begin(), input.end(), output->begin()); + return output; + } + + instance::instance(){} + instance::instance( std::vector>& event){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventInt = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event){ + this->procEventStr = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventStr = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventStr = event; + this->process = REX::event( event ); + } + void instance::setAmp( REX::teaw::amplitude& amp ){ + bridgeCall = amp; + } + std::shared_ptr> instance::ampEval( std::vector& momenta, std::vector& alphaS ){ + return bridgeCall( momenta, alphaS ); + } + std::shared_ptr> instance::ampEval( std::shared_ptr> momenta, + std::shared_ptr> alphaS ){ + return bridgeCall( *momenta, *alphaS ); + } + +} + +#endif diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index e87219b001..376635933e 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -13,7 +13,7 @@ #ifndef _RWGT_INSTANCE_H_ #define _RWGT_INSTANCE_H_ -#include "teawREX.hpp" +#include "teawREX.h" namespace rwgt{ @@ -21,49 +21,24 @@ namespace rwgt{ //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size - unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ){ - return (nWarp - ( nEvt % nWarp )) % nWarp; - } - - //ZW: Function for padding the input arrays to a multiple of the warp size - template - std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ - auto nEvt = input->size(); - auto nWarpRemain = warpRemain( nEvt, nWarp ); - auto fauxNEvt = nEvt + nWarpRemain; - auto output = std::make_shared>( fauxNEvt ); - std::copy( input.begin(), input.end(), output->begin()); - return output; - } + unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ); struct instance{ - std::vector> procEvent; + std::vector> procEventInt; + std::vector> procEventStr; REX::event process; REX::teaw::amplitude bridgeCall; - instance(){} - instance( std::vector>& event){ - this->procEvent = event; - this->process = REX::event( event ); - } - instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEvent = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void setProc( std::vector>& event ){ - this->procEvent = event; - this->process = REX::event( event ); - } - void setAmp( REX::teaw::amplitude& amp ){ - bridgeCall = amp; - } - std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ){ - return bridgeCall( momenta, alphaS ); - } + instance(); + instance( std::vector>& event); + instance( std::vector>& event, REX::teaw::amplitude& amp ); + void setProc( std::vector>& event ); + instance( std::vector>& event); + instance( std::vector>& event, REX::teaw::amplitude& amp ); + void setProc( std::vector>& event ); + void setAmp( REX::teaw::amplitude& amp ); + std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ); std::shared_ptr> ampEval( std::shared_ptr> momenta, - std::shared_ptr> alphaS ){ - return bridgeCall( *momenta, *alphaS ); - } + std::shared_ptr> alphaS ); }; } diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 14d2dfdc79..51be5e7ec7 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -12,7 +12,6 @@ %(process_lines)s //-------------------------------------------------------------------------- -#include "teawREX.hpp" #include "rwgt_instance.h" #include "fbridge.cc" @@ -65,7 +64,7 @@ namespace %(process_namespace)s{ } std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ runnerSetup( alphaS ); - for( size_t k = 0 ; k < nWarpRemain ; ++k ){ + for( size_t j = 0 ; j < nWarpRemain ; ++j ){ alphaS.push_back( 0. ); for( size_t k = 0 ; k < nMom * nPar ; ++k ){ momenta.push_back( 0. ); @@ -113,7 +112,7 @@ namespace %(process_namespace)s{ // auto procEvent = REX::event( procEvent ); // REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - std::vector> eventVec = {%(process_event)s}; + std::vector> eventVec = {%(process_event)s}; REX::event locEv = REX::event( eventVec ); fbridgeRunner fBridge = fbridgeRunner( locEv ); @@ -124,7 +123,6 @@ namespace %(process_namespace)s{ REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; auto runner = rwgt::instance(eventVec, scatteringAmp); - //auto thisProc = runner.getProc( scatteringAmp ); auto thisProc = runner.process.getProc( currProcSort ); // ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc new file mode 120000 index 0000000000..f9640c2fcb --- /dev/null +++ b/tools/REX/teawREX.cc @@ -0,0 +1 @@ +teawREX.hpp \ No newline at end of file diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h new file mode 100644 index 0000000000..a865db4944 --- /dev/null +++ b/tools/REX/teawREX.h @@ -0,0 +1,188 @@ +/*** + * _ ______ _______ __ + * | | | ___ \ ___\ \ / / + * | |_ ___ __ ___ _| |_/ / |__ \ V / + * | __/ _ \/ _` \ \ /\ / / /| __| / \ + * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ + * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _TEAWREX_H_ +#define _TEAWREX_H_ + +#include +#include +#include +#include +#include +#include +#include +#include "REX.h" + +namespace REX::teaw +{ + + using amplitude = std::function>(std::vector&, std::vector&)>; + using ampCall = std::map; + using ampPair = std::pair; + using vecMap = std::map>, REX::eventComp>; + + struct rwgtVal : REX::paramVal{ + public: + std::string_view blockName; + bool allStat; + bool isAll(); + rwgtVal(); + rwgtVal( std::string_view paramLine ); + std::string_view getLine(); + void outWrite( REX::paramBlock& srcBlock ); + }; + + struct rwgtBlock { + public: + std::string_view name; + std::vector rwgtVals; + rwgtBlock( std::vector values = {}, std::string_view title = "" ); + rwgtBlock( const std::vector& vals, std::string_view title = "" ); + std::string_view getBlock(); + void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ); + protected: + std::string runBlock; + bool written = false; + }; + + struct rwgtProc { + public: + std::vector rwgtParams; + std::string_view procString; + std::string_view rwgtName; + std::vector rwgtOpts; + void parse(); + rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ); + std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ); + std::string_view comRunProc(); + }; + + struct rwgtCard{ + public: + REX::lesHouchesCard slhaCard; + std::vector rwgtRuns; + std::vector rwgtProcs; + std::vector opts; + std::vector rwgtNames; + std::string_view srcCard; + void parse( bool parseOnline = false ); + rwgtCard( std::string_view reweight_card ); + rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ); + std::vector> writeCards( REX::lesHouchesCard& slhaOrig ); + }; + + + struct rwgtCollection { + public: + void setRwgt( std::shared_ptr rwgts ); + void setRwgt( rwgtCard rwgts ); + void setSlha( std::shared_ptr slha ); + void setSlha( REX::lesHouchesCard slha ); + void setLhe( std::shared_ptr lhe ); + void setLhe( REX::lheNode& lhe ); + void setLhe( std::string_view lhe_file ); + std::shared_ptr getRwgt(); + std::shared_ptr getSlha(); + std::shared_ptr getLhe(); + rwgtCollection(); + rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ); + protected: + template + void setDoubles(Args&&... args); + std::shared_ptr rwgtSets; + std::shared_ptr slhaParameters; + std::shared_ptr lheFile; + std::vector>> wgts; + std::vector>> gS; + std::vector>> momenta; + bool lheFileSet = false; + bool slhaSet = false; + bool rwgtSet = false; + REX::transLHE eventFile; + }; + + struct rwgtFiles : rwgtCollection { + void setRwgtPath( std::string_view path ); + void setSlhaPath( std::string_view path ); + void setLhePath( std::string_view path ); + rwgtFiles(); + rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ); + template + void initCards(Args&&... args); + template + void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ); + protected: + void pullRwgt(); + void pullSlha(); + void pullLhe(); + std::string rwgtPath; + std::string lhePath; + std::string slhaPath; + std::shared_ptr lheCard; + std::shared_ptr slhaCard; + std::shared_ptr rewgtCard; + }; + + struct rwgtRunner : rwgtFiles{ + public: + void setMeEval( amplitude eval ); + void setMeEvals( ampCall evals ); + void addMeEval( const REX::event& ev, const amplitude& eval ); + rwgtRunner(); + rwgtRunner( rwgtFiles& rwgts ); + rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ); + rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ); + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + amplitude meCalc ); + rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + ampCall meCalcs ); + bool oneME(); + bool singAmp(); + protected: + bool meInit = false; + bool meCompInit = false; + bool meSet = false; + bool normWgtSet = false; + amplitude meEval; + ampCall meEvals; + std::vector>> initMEs; + std::vector>> meNormWgts; + std::shared_ptr> normWgt; + std::shared_ptr rwgtGroup; + template + void setMEs(Args&&... args); + bool setParamCard( std::shared_ptr slhaParams ); + void setNormWgtsSingleME(); + void setNormWgtsMultiME(); + template + void setNormWgts(Args&&... args); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ); + bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + std::string& id, REX::event& ev ); + bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ); + public: + void runRwgt( const std::string& output ); + }; + + + void rwgtRun( rwgtRunner& rwgt, const std::string& path ); + +} + +#endif \ No newline at end of file diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index 971b563f82..ae9efff776 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -15,8 +15,8 @@ // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. // All rights reserved. -#ifndef _TEAWREX_HPP_ -#define _TEAWREX_HPP_ +#ifndef _TEAWREX_CC_ +#define _TEAWREX_CC_ #include #include @@ -25,16 +25,12 @@ #include #include #include -#include "REX.hpp" +#include "REX.cc" +#include "teawREX.h" namespace REX::teaw { - using amplitude = std::function>(std::vector&, std::vector&)>; - using ampCall = std::map; - using ampPair = std::pair; - using vecMap = std::map>, REX::eventComp>; - template std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) { return evalFunc(momenta); } @@ -51,13 +47,8 @@ namespace REX::teaw std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) { return evalFunc(momenta); } - struct rwgtVal : REX::paramVal{ - public: - std::string_view blockName; - bool allStat; - bool isAll(){ return (idStr == "all"); } - rwgtVal() : paramVal(){ return; } - rwgtVal( std::string_view paramLine ) + rwgtVal::rwgtVal() : paramVal(){ return; } + rwgtVal::rwgtVal( std::string_view paramLine ) : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } realLine = paramLine; auto vals = *REX::nuBlankSplitter( realLine ); @@ -65,8 +56,9 @@ namespace REX::teaw idStr = vals[2]; valStr = vals[3]; } - std::string_view getLine(){ return realLine; } - void outWrite( REX::paramBlock& srcBlock ){ + std::string_view rwgtVal::getLine(){ return realLine; } + bool rwgtVal::isAll(){ return (idStr == "all"); } + void rwgtVal::outWrite( REX::paramBlock& srcBlock ){ if ( isAll() ) { for( auto param : srcBlock.params ) @@ -89,13 +81,8 @@ namespace REX::teaw srcBlock.modded = true; return; } - }; - struct rwgtBlock { - public: - std::string_view name; - std::vector rwgtVals; - rwgtBlock( std::vector values = {}, std::string_view title = "" ) + rwgtBlock::rwgtBlock( std::vector values, std::string_view title) { name = title; rwgtVals.resize( values.size() ); @@ -104,12 +91,12 @@ namespace REX::teaw rwgtVals[k] = rwgtVal( values[k] ); } } - rwgtBlock( const std::vector& vals, std::string_view title = "" ) + rwgtBlock::rwgtBlock( const std::vector& vals, std::string_view title ) { name = title; rwgtVals = vals; } - std::string_view getBlock(){ + std::string_view rwgtBlock::getBlock(){ if( written ){ return runBlock; } runBlock = ""; for( auto val : rwgtVals ){ @@ -118,7 +105,7 @@ namespace REX::teaw written = true; return runBlock; } - void outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) + void rwgtBlock::outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) { for( auto parm : rwgtVals ) { @@ -127,18 +114,8 @@ namespace REX::teaw srcBlock.modded = true; return; } - protected: - std::string runBlock; - bool written = false; - }; - struct rwgtProc { - public: - std::vector rwgtParams; - std::string_view procString; - std::string_view rwgtName; - std::vector rwgtOpts; - void parse(){ + void rwgtProc::parse(){ std::vector blocks; std::vector>> params; auto procLines = *REX::nuLineSplitter( procString ); @@ -162,7 +139,7 @@ namespace REX::teaw rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); } } - rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet = "", bool parseOnline = false ) + rwgtProc::rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet, bool parseOnline ) { if( rwgtSet == "" ){ return; } auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; @@ -172,7 +149,7 @@ namespace REX::teaw procString = rwgtSet.substr( strtLi, endLi - strtLi ); if( parseOnline ){ parse(); } } - std::shared_ptr outWrite( const REX::lesHouchesCard& paramOrig ){ + std::shared_ptr rwgtProc::outWrite( const REX::lesHouchesCard& paramOrig ){ auto slhaOrig = std::make_shared( paramOrig ); std::map blockIds; for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) @@ -184,18 +161,9 @@ namespace REX::teaw slhaOrig->modded = true; return slhaOrig; } - std::string_view comRunProc(){ return procString; } - }; + std::string_view rwgtProc::comRunProc(){ return procString; } - struct rwgtCard{ - public: - REX::lesHouchesCard slhaCard; - std::vector rwgtRuns; - std::vector rwgtProcs; - std::vector opts; - std::vector rwgtNames; - std::string_view srcCard; - void parse( bool parseOnline = false ) { + void rwgtCard::parse( bool parseOnline ) { auto strt = srcCard.find("launch"); while( auto commPos = srcCard.find_last_of("#", strt) > srcCard.find_last_of("\n", strt) ){ if( commPos == REX::npos ){ @@ -252,15 +220,15 @@ namespace REX::teaw } } } - rwgtCard( std::string_view reweight_card ){ + rwgtCard::rwgtCard( std::string_view reweight_card ){ srcCard = reweight_card; } - rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ){ + rwgtCard::rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline ){ srcCard = reweight_card; slhaCard = slhaParams; if( parseOnline ){ parse( parseOnline ); } } - std::vector> writeCards( REX::lesHouchesCard& slhaOrig ){ + std::vector> rwgtCard::writeCards( REX::lesHouchesCard& slhaOrig ){ std::vector> cardVec; slhaOrig.parse(); cardVec.reserve( rwgtRuns.size() ); @@ -270,58 +238,54 @@ namespace REX::teaw } return cardVec; } - }; - struct rwgtCollection { - public: - void setRwgt( std::shared_ptr rwgts ){ + void rwgtCollection::setRwgt( std::shared_ptr rwgts ){ if( rwgtSet ){ return; } rwgtSets = rwgts; rwgtSet = true; } - void setRwgt( rwgtCard rwgts ){ + void rwgtCollection::setRwgt( rwgtCard rwgts ){ if( rwgtSet ){ return; } setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; } - void setSlha( std::shared_ptr slha ){ + void rwgtCollection::setSlha( std::shared_ptr slha ){ if( slhaSet ){ return; } slhaParameters = slha; slhaParameters->parse(); slhaSet = true; } - void setSlha( REX::lesHouchesCard slha ){ + void rwgtCollection::setSlha( REX::lesHouchesCard slha ){ if( slhaSet ){ return; } setSlha( std::make_shared( slha ) ); slhaSet = true; } - void setLhe( std::shared_ptr lhe ){ + void rwgtCollection::setLhe( std::shared_ptr lhe ){ if( lheFileSet ){ return; } lheFile = lhe; lheFileSet = true; } - void setLhe( REX::lheNode& lhe ){ + void rwgtCollection::setLhe( REX::lheNode& lhe ){ if( lheFileSet ){ return; } setLhe( std::make_shared( lhe ) ); lheFileSet = true; } - void setLhe( std::string_view lhe_file ){ - if( lheFileSet ){ return; } + void rwgtCollection::setLhe( std::string_view lhe_file ){std::cout << "line 272\n"; + if( lheFileSet ){ return; } std::cout << "line 273\n"; //lheFile = REX::lheParser( lhe_file, strt, post ); - lheFile = std::make_shared( *lheFile ); - lheFileSet = true; - } - std::shared_ptr getRwgt(){ return rwgtSets; } - std::shared_ptr getSlha(){ return slhaParameters; } - std::shared_ptr getLhe(){ return lheFile; } - rwgtCollection(){ return; } - rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ + lheFile = std::make_shared( REX::lheNode(lhe_file) ); std::cout << "line 275\n"; + lheFileSet = true; std::cout << "line 276\n"; + } + std::shared_ptr rwgtCollection::getRwgt(){ return rwgtSets; } + std::shared_ptr rwgtCollection::getSlha(){ return slhaParameters; } + std::shared_ptr rwgtCollection::getLhe(){ return lheFile; } + rwgtCollection::rwgtCollection(){ return; } + rwgtCollection::rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ setLhe( lhe ); setSlha( slha ); setRwgt( rwgts ); } - protected: template - void setDoubles(Args&&... args){ + void rwgtCollection::setDoubles(Args&&... args){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; @@ -337,30 +301,18 @@ namespace REX::teaw momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } } - std::shared_ptr rwgtSets; - std::shared_ptr slhaParameters; - std::shared_ptr lheFile; - std::vector>> wgts; - std::vector>> gS; - std::vector>> momenta; - bool lheFileSet = false; - bool slhaSet = false; - bool rwgtSet = false; - REX::transLHE eventFile; - }; - struct rwgtFiles : rwgtCollection { - void setRwgtPath( std::string_view path ){ rwgtPath = path; } - void setSlhaPath( std::string_view path ){ slhaPath = path; } - void setLhePath( std::string_view path ){ lhePath = path; } - rwgtFiles() : rwgtCollection(){ return; } - rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ + void rwgtFiles::setRwgtPath( std::string_view path ){ rwgtPath = path; } + void rwgtFiles::setSlhaPath( std::string_view path ){ slhaPath = path; } + void rwgtFiles::setLhePath( std::string_view path ){ lhePath = path; } + rwgtFiles::rwgtFiles() : rwgtCollection(){ return; } + rwgtFiles::rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ setRwgtPath( reweight_card ); setSlhaPath( slha_card ); setLhePath( lhe_card ); } template - void initCards(Args&&... args){ + void rwgtFiles::initCards(Args&&... args){ if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); pullRwgt(); pullSlha(); pullLhe(); @@ -370,75 +322,55 @@ namespace REX::teaw setDoubles(args...); } template - void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ + void rwgtFiles::initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ setLhePath( lhe_card ); setSlhaPath( slha_card ); setRwgtPath( reweight_card ); initCards(args...); } - protected: - void pullRwgt(){ + void rwgtFiles::pullRwgt(){ rewgtCard = REX::filePuller( rwgtPath ); } - void pullSlha(){ + void rwgtFiles::pullSlha(){ slhaCard = REX::filePuller( slhaPath ); } - void pullLhe(){ + void rwgtFiles::pullLhe(){ lheCard = REX::filePuller( lhePath ); + std::cout << *lheCard << "\n"; } - std::string rwgtPath; - std::string lhePath; - std::string slhaPath; - std::shared_ptr lheCard; - std::shared_ptr slhaCard; - std::shared_ptr rewgtCard; - }; - struct rwgtRunner : rwgtFiles{ - public: - void setMeEval( amplitude eval ){ + void rwgtRunner::setMeEval( amplitude eval ){ meEval = eval; meInit = true; ampCall nuEvals; nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); meEvals = nuEvals; } - void setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } - void addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } - rwgtRunner() : rwgtFiles(){ return; } - rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } - rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ + void rwgtRunner::setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } + void rwgtRunner::addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } + rwgtRunner::rwgtRunner() : rwgtFiles(){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ meEval = meCalc; meInit = true; } - rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ meEvals = meCalcs; meCompInit = true; } - rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEval = meCalc; meInit = true; } - rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ meEvals = meCalcs; meCompInit = true; } - bool oneME(){ return (meInit != meCompInit); } - bool singAmp(){ return (meInit && !meCompInit); } - protected: - bool meInit = false; - bool meCompInit = false; - bool meSet = false; - bool normWgtSet = false; - amplitude meEval; - ampCall meEvals; - std::vector>> initMEs; - std::vector>> meNormWgts; - std::shared_ptr> normWgt; - std::shared_ptr rwgtGroup; + bool rwgtRunner::oneME(){ return (meInit != meCompInit); } + bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } template - void setMEs(Args&&... args){ + void rwgtRunner::setMEs(Args&&... args){ initCards(args...); if( !oneME() ) throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); @@ -453,7 +385,7 @@ namespace REX::teaw //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; meSet = true; } - bool setParamCard( std::shared_ptr slhaParams ){ + bool rwgtRunner::setParamCard( std::shared_ptr slhaParams ){ if( slhaPath == "" ) throw std::runtime_error( "No parameter card path has been provided." ); if( slhaParameters == nullptr ) @@ -462,7 +394,7 @@ namespace REX::teaw throw std::runtime_error( "Failed to overwrite parameter card." ); return true; } - void setNormWgtsSingleME(){ + void rwgtRunner::setNormWgtsSingleME(){ //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); meNormWgts = {std::make_shared>( wgts[0]->size() )}; @@ -471,7 +403,7 @@ namespace REX::teaw } normWgt = meNormWgts[0]; } - void setNormWgtsMultiME(){ + void rwgtRunner::setNormWgtsMultiME(){ meNormWgts = std::vector>>( initMEs.size() ); for( auto k = 0 ; k < wgts.size() ; ++k ){ meNormWgts[k] = std::make_shared>( wgts[k]->size() ); @@ -482,7 +414,7 @@ namespace REX::teaw normWgt = eventFile.vectorFlat( meNormWgts ); } template - void setNormWgts(Args&&... args){ + void rwgtRunner::setNormWgts(Args&&... args){ if( !oneME() ){ setMEs(args...); } //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); @@ -494,7 +426,7 @@ namespace REX::teaw else { setNormWgtsMultiME(); } normWgtSet = true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -518,7 +450,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -542,7 +474,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -567,7 +499,7 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); @@ -592,14 +524,13 @@ namespace REX::teaw lheFile->addWgt( 0, nuWgt ); return true; } - bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ){ + bool rwgtRunner::lheFileWriter( std::shared_ptr lheFile, std::string outputDir ){ bool writeSuccess = REX::filePusher( outputDir, *lheFile->nodeWriter() ); if( !writeSuccess ) throw std::runtime_error( "Failed to write LHE file." ); return true; } - public: - void runRwgt( const std::string& output ){ + void rwgtRunner::runRwgt( const std::string& output ){ setMEs(); setNormWgts(); rwgtGroup = std::make_shared(); @@ -613,7 +544,10 @@ namespace REX::teaw REX::filePusher( slhaPath, *slhaCard ); std::cout << "\nReweighting done.\n"; } - }; + + void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ + rwgt.runRwgt( path ); + } } -#endif \ No newline at end of file +#endif diff --git a/tools/REX/tester.cpp b/tools/REX/tester.cpp index d7d8493c25..9a795d1a7e 100644 --- a/tools/REX/tester.cpp +++ b/tools/REX/tester.cpp @@ -19,15 +19,26 @@ std::shared_ptr> sorterFunc(std::string_view dummy, std::vec int main( int argc, char* argv[] ){ std::string lheFilePath; + std::string rwgtCardPath; + std::string outputPath; + std::string slhaPath; // READ COMMAND LINE ARGUMENTS - for( int arg = 0; arg < argc; arg++ ) +for( int i = 1; i < argc; i++ ) { - auto currArg = std::string( argv[arg] ); + auto currArg = std::string( argv[i] ); if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) { lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); } + else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) + { + rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ + outputPath = currArg.substr( currArg.find( "=" ) + 1 ); + } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ + slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); + } } @@ -57,6 +68,10 @@ int main( int argc, char* argv[] ){ } std::cout << evsVals->size() << "\n"; std::cout << siz << "\n"; + + REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + fileCol.initCards(); + return 0; } \ No newline at end of file diff --git a/tools/REX/unweighted_events.lhe b/tools/REX/unweighted_events.lhe deleted file mode 100644 index 6b05b56584..0000000000 --- a/tools/REX/unweighted_events.lhe +++ /dev/null @@ -1,1870 +0,0 @@ - -
- - -3.5.2 - - - 3j -output -]]> - - -#********************************************************************* -# MadGraph/MadEvent * -# http://madgraph.hep.uiuc.edu * -# * -# proc_card.dat * -#********************************************************************* -# * -# This Files is generated by MADGRAPH 5 * -# * -# WARNING: This Files is generated for MADEVENT (compatibility issue)* -# This files is NOT a valid MG4 proc_card.dat * -# Running this in MG4 will NEVER reproduce the result of MG5* -# * -#********************************************************************* -#********************************************************************* -# Process(es) requested : mg2 input * -#********************************************************************* -# Begin PROCESS # This is TAG. Do not modify this line -p p > 3j #Process -# Be carefull the coupling are here in MG5 convention - -end_coup # End the couplings input - -done # this tells MG there are no more procs -# End PROCESS # This is TAG. Do not modify this line -#********************************************************************* -# Model information * -#********************************************************************* -# Begin MODEL # This is TAG. Do not modify this line -sm -# End MODEL # This is TAG. Do not modify this line -#********************************************************************* -# Start multiparticle definitions * -#********************************************************************* -# Begin MULTIPARTICLES # This is TAG. Do not modify this line - -# End MULTIPARTICLES # This is TAG. Do not modify this line - - - - - -###################################################################### -## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### -###################################################################### -################################### -## INFORMATION FOR MASS -################################### -BLOCK MASS # - 5 4.700000e+00 # mb - 6 1.730000e+02 # mt - 15 1.777000e+00 # mta - 23 9.118800e+01 # mz - 25 1.250000e+02 # mh - 1 0.000000e+00 # d : 0.0 - 2 0.000000e+00 # u : 0.0 - 3 0.000000e+00 # s : 0.0 - 4 0.000000e+00 # c : 0.0 - 11 0.000000e+00 # e- : 0.0 - 12 0.000000e+00 # ve : 0.0 - 13 0.000000e+00 # mu- : 0.0 - 14 0.000000e+00 # vm : 0.0 - 16 0.000000e+00 # vt : 0.0 - 21 0.000000e+00 # g : 0.0 - 22 0.000000e+00 # a : 0.0 - 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) -################################### -## INFORMATION FOR SMINPUTS -################################### -BLOCK SMINPUTS # - 1 1.325070e+02 # aewm1 - 2 1.166390e-05 # gf - 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) -################################### -## INFORMATION FOR YUKAWA -################################### -BLOCK YUKAWA # - 5 4.700000e+00 # ymb - 6 1.730000e+02 # ymt - 15 1.777000e+00 # ymtau -################################### -## INFORMATION FOR DECAY -################################### -DECAY 6 1.491500e+00 # wt -DECAY 23 2.441404e+00 # wz -DECAY 24 2.047600e+00 # ww -DECAY 25 6.382339e-03 # wh -DECAY 1 0.000000e+00 # d : 0.0 -DECAY 2 0.000000e+00 # u : 0.0 -DECAY 3 0.000000e+00 # s : 0.0 -DECAY 4 0.000000e+00 # c : 0.0 -DECAY 5 0.000000e+00 # b : 0.0 -DECAY 11 0.000000e+00 # e- : 0.0 -DECAY 12 0.000000e+00 # ve : 0.0 -DECAY 13 0.000000e+00 # mu- : 0.0 -DECAY 14 0.000000e+00 # vm : 0.0 -DECAY 15 0.000000e+00 # ta- : 0.0 -DECAY 16 0.000000e+00 # vt : 0.0 -DECAY 21 0.000000e+00 # g : 0.0 -DECAY 22 0.000000e+00 # a : 0.0 - - -# Number of Events : 100 -# Integrated weight (pb) : 66372287.22200001 - -
- -2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 -6.637229e+07 1.268397e+06 6.637229e+07 1 -please cite 1405.0301 - - - 5 1 +6.6372287e+07 4.60140800e+01 7.54677100e-03 1.46810800e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.8788806474e+02 1.8788806474e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.0556910363e+01 3.0556910363e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.0298827890e+01 -4.1053633424e+01 +8.3051244550e+01 9.3214676391e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +1.7276524932e+01 -1.2156784273e+01 -1.1495329061e+01 2.4050120744e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -6.9776970424e+00 +5.3210417698e+01 +8.5775238884e+01 1.0118017797e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.46014081E+02 -0 - 1 21 0.28905856E-01 0.46014081E+02 - 1 21 0.47010632E-02 0.46014081E+02 - 0.31830845E+06 - - - - 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32555892E+02 -0 - 1 21 0.43037713E-01 0.32555892E+02 - 1 21 0.77100414E-02 0.32555892E+02 - 0.65037882E+05 - - - - 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30590836E+02 -0 - 1 21 0.97313711E-01 0.30590836E+02 - 1 2 0.70353702E-01 0.30590836E+02 - 0.91658669E+02 - - - - 5 1 +6.6372287e+07 1.24970000e+02 7.54677100e-03 1.23511600e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +6.4054339688e+02 6.4054339688e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -3.3928351011e+01 3.3928351011e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -3.6546574781e+01 +7.3293152180e+00 +5.3085336864e+01 6.4864658942e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 -8.4789922053e+01 +1.0871076160e+01 +6.9212770934e+01 1.0999053977e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +1.2133649683e+02 -1.8200391378e+01 +4.8431693807e+02 4.9961654918e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.12497005E+03 -0 - 1 21 0.98545129E-01 0.12497005E+03 - 1 21 0.52197468E-02 0.12497005E+03 - 0.21698561E+05 - - - - 5 1 +6.6372287e+07 2.09917500e+01 7.54677100e-03 1.72629600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.9393491974e+01 2.9393491974e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0612634540e+01 5.0612634540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.2361493101e+01 -8.0134576492e+00 -2.5339678876e+01 3.4732566890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +5.3440837509e+00 +2.0304167068e+01 +1.0307030697e+01 2.3389170854e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +1.7017409350e+01 -1.2290709419e+01 -6.1864943863e+00 2.1884388769e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.20991755E+02 -0 - 1 21 0.45220758E-02 0.20991755E+02 - 1 21 0.77865590E-02 0.20991755E+02 - 0.28846636E+07 - - - - 5 1 +6.6372287e+07 2.01883800e+01 7.54677100e-03 1.74160800e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.9616331394e+01 2.9616331394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.1809592212e+02 2.1809592212e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.4332017667e+01 -1.5898231494e+01 -1.1283261663e+02 1.1484493837e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +5.5127621513e+00 +1.9607065270e+01 -9.9531289229e+01 1.0159382408e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.9844779818e+01 -3.7088337755e+00 +2.3884315130e+01 3.1273491063e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.20188381E+02 -0 - 1 21 0.45563588E-02 0.20188381E+02 - 1 21 0.33553218E-01 0.20188381E+02 - 0.23199633E+06 - - - - 5 1 +6.6372287e+07 2.83114100e+01 7.54677100e-03 1.61754100e-01 - -3 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.8261154183e+01 2.8261154183e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -6.5070264344e+01 6.5070264344e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +7.3144338996e+00 +3.7539358060e+01 -8.3663539266e+00 3.9149715515e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.5538451858e+01 -1.6013356486e+01 -1.8894895213e+01 2.9238470159e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -3 1 1 2 0 504 +8.2240179584e+00 -2.1526001574e+01 -9.5478610208e+00 2.4943232854e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.28311412E+02 -0 - 1 21 0.10010810E-01 0.28311412E+02 - 1 -3 0.43478699E-02 0.28311412E+02 - 0.75606750E+05 - - - - 5 1 +6.6372287e+07 2.50484100e+01 7.54677100e-03 1.66030800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.5278855952e+02 4.5278855952e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3454632319e+00 3.3454632319e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -1.5102249073e+01 -2.7392413109e+01 +1.7894067235e+02 1.8165402953e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.1057931191e+01 +3.9670307239e+00 +9.9776507011e+01 1.0205158083e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -5.9556821180e+00 +2.3425382385e+01 +1.7072591693e+02 1.7242841240e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.25048406E+02 -0 - 1 21 0.69659730E-01 0.25048406E+02 - 1 21 0.51468701E-03 0.25048406E+02 - 0.16161844E+07 - - - - 5 1 +6.6372287e+07 6.54738600e+01 7.54677100e-03 1.37619800e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +2.6663794394e+01 2.6663794394e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.5265738923e+02 2.5265738923e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 -7.6137868107e+00 +4.2439462980e+01 -1.6255497692e+02 1.6817609310e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.7587948234e+01 +1.0621679064e+01 -4.5177420050e+01 4.9630185085e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +2.5201735045e+01 -5.3061142044e+01 -1.8261197867e+01 6.1514905444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.65473858E+02 -0 - 1 21 0.41021221E-02 0.65473858E+02 - 1 1 0.38870368E-01 0.65473858E+02 - 0.41073273E+05 - - - - 5 1 +6.6372287e+07 4.71053000e+01 7.54677100e-03 1.46161100e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.0723487937e+02 1.0723487937e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2434583342e+02 1.2434583342e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -1.5074548460e+01 +4.4668996332e+01 +7.0907382043e+01 8.5149386802e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -3.7446327852e+01 -2.8577640944e+01 -7.7213750461e+01 9.0448174619e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +5.2520876312e+01 -1.6091355388e+01 -1.0804585631e+01 5.5983151371e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.47105297E+02 -0 - 1 21 0.16497674E-01 0.47105297E+02 - 1 21 0.19130128E-01 0.47105297E+02 - 0.81247298E+05 - - - - 5 1 +6.6372287e+07 4.77488600e+01 7.54677100e-03 1.45787600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.3351097238e+02 1.3351097238e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.1959914093e+03 2.1959914093e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.6534412892e+01 +2.0887502154e+01 -2.0204850067e+03 2.0207671872e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.7723341226e+01 -6.8071401227e+01 -1.6302498162e+02 1.7882797305e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 -1.1889283334e+00 +4.7183899072e+01 +1.2102955134e+02 1.2990722143e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.47748865E+02 -0 - 1 21 0.20540149E-01 0.47748865E+02 - 1 21 0.33784484E+00 0.47748865E+02 - 0.69049208E+02 - - - - 5 1 +6.6372287e+07 5.17648700e+01 7.54677100e-03 1.43604800e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.0572435077e+02 7.0572435077e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -8.6069634546e+00 8.6069634546e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.3775888591e+01 -5.0832360721e+00 +1.9988986553e+01 3.1475256166e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +2.1064391322e+01 -6.4983212153e+00 +5.8809348241e+01 6.2805065090e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 502 -4.4840279913e+01 +1.1581557287e+01 +6.1831905252e+02 6.2005099297e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.51764867E+02 -0 - 1 21 0.13241488E-02 0.51764867E+02 - 1 -2 0.10857293E+00 0.51764867E+02 - 0.12387408E+05 - - - - 5 1 +6.6372287e+07 2.68215700e+01 7.54677100e-03 1.63613700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.7995183998e+01 4.7995183998e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -4.6755507222e+02 4.6755507222e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -5.6572765501e+00 -2.1816941248e+01 +1.1968309353e+01 2.5519093482e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.3663698968e+01 +1.2778475361e+01 -4.5222293332e+02 4.5302189959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +2.9320975518e+01 +9.0384658865e+00 +2.0694735751e+01 3.7009263148e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26821571E+02 -0 - 1 21 0.73838749E-02 0.26821571E+02 - 1 21 0.71931546E-01 0.26821571E+02 - 0.24837378E+05 - - - - 5 1 +6.6372287e+07 4.31543000e+01 7.54677100e-03 1.48620400e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +7.1563261884e+02 7.1563261884e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4644760996e+01 1.4644760996e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.2645094462e+01 -4.3267730831e+01 +6.6587442685e+02 6.6739849211e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -1.7724111495e+01 +1.5186816241e+01 +1.2556432303e+01 2.6503726304e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +5.0790170329e+00 +2.8080914591e+01 +2.2556998696e+01 3.6375161422e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43154296E+02 -0 - 1 21 0.11009730E+00 0.43154296E+02 - 1 21 0.22530407E-02 0.43154296E+02 - 0.66560154E+05 - - - - 5 1 +6.6372287e+07 4.37774800e+01 7.54677100e-03 1.48212100e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.6494856549e+02 2.6494856549e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4840936233e+01 4.4840936233e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 -2.3919572741e+01 -2.1836315356e+01 -2.8033165864e+01 4.2834904188e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +2.0968394452e+01 -2.8904789122e+01 +4.4445065176e+01 5.7013368771e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +2.9511782884e+00 +5.0741104479e+01 +2.0369572994e+02 2.0994122876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43777479E+02 -0 - 1 21 0.40761317E-01 0.43777479E+02 - 1 21 0.68986058E-02 0.43777479E+02 - 0.88070658E+05 - - - - 5 1 +6.6372287e+07 3.19042100e+01 7.54677100e-03 1.57794600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.5412150098e+01 3.5412150098e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.5668427371e+02 7.5668427371e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +4.2078004819e+00 -3.1373137318e+01 -7.1649764593e+02 7.1719652534e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +1.7730843460e+01 -1.5141321578e+01 -1.1865670592e+00 2.3346313849e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -2.1938643942e+01 +4.6514458896e+01 -3.5879106177e+00 5.1553584618e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.31904206E+02 -0 - 1 21 0.54480234E-02 0.31904206E+02 - 1 21 0.11641296E+00 0.31904206E+02 - 0.14302972E+05 - - - - 5 1 +6.6372287e+07 2.62752100e+01 7.54677100e-03 1.64333300e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +7.3680834147e+01 7.3680834147e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -4.3076387169e+02 4.3076387169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +5.3624128570e+00 +1.9370699338e+01 -1.8582414279e+02 1.8690797605e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.5600742238e+01 -2.1293512835e+01 +7.0284358620e+01 7.5077878991e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.0963155095e+01 +1.9228134974e+00 -2.4154325337e+02 2.4245885080e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26275207E+02 -0 - 1 21 0.11335513E-01 0.26275207E+02 - 1 2 0.66271366E-01 0.26275207E+02 - 0.56080712E+04 - - - - 5 1 +6.6372287e+07 4.55308900e+01 7.54677100e-03 1.47105400e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.1459625930e+03 2.1459625930e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5418446222e+00 5.5418446222e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +3.4560110742e+01 -1.7822362191e+01 +1.6729667012e+02 1.7175626242e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +8.6373215770e+00 +3.7927160061e+01 +1.2285930833e+02 1.2887002149e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -4.3197432319e+01 -2.0104797870e+01 +1.8502647699e+03 1.8508781537e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.45530891E+02 -0 - 1 21 0.85259319E-03 0.45530891E+02 - 1 2 0.33014743E+00 0.45530891E+02 - 0.37889394E+05 - - - - 5 1 +6.6372287e+07 3.49649300e+01 7.54677100e-03 1.54891200e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0100600886e+00 8.0100600886e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.6748462249e+03 1.6748462249e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -2.8388621465e+01 +1.5105638110e+01 -1.9595061691e+02 1.9857174623e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 +5.9035355306e+00 -3.7141587409e+01 -1.4389449039e+03 1.4394362736e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 +2.2485085934e+01 +2.2035949299e+01 -3.1940644021e+01 4.4848265200e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34964932E+02 -0 - 1 2 0.25766864E+00 0.34964932E+02 - 1 -2 0.12323170E-02 0.34964932E+02 - 0.15263237E+04 - - - - 5 1 +6.6372287e+07 3.04072400e+01 7.54677100e-03 1.59363000e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.3031354025e+01 2.3031354025e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8224559169e+02 1.8224559169e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.5754317368e+01 -4.7408923451e+01 -9.5689678327e+01 1.0985174293e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.1009373966e+01 +2.4882397341e+01 -8.6110655342e-01 2.7222812438e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -1.4744943402e+01 +2.2526526110e+01 -6.2663452789e+01 6.8202390354e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30407236E+02 -0 - 1 21 0.35432851E-02 0.30407236E+02 - 1 21 0.28037785E-01 0.30407236E+02 - 0.50703811E+06 - - - - 5 1 +6.6372287e+07 2.46316000e+01 7.54677100e-03 1.66635000e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.0590561410e+01 8.0590561410e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.2809489236e+02 6.2809489236e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.7815991771e+01 +2.0155583443e+01 -8.5520591269e+00 2.8227554305e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -7.5126314979e+00 -1.9813252642e+01 -6.0724710539e+02 6.0761669795e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +2.5328623269e+01 -3.4233080119e-01 +6.8294833568e+01 7.2841201522e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.24631597E+02 -0 - 1 21 0.96629979E-01 0.24631597E+02 - 1 -1 0.12398548E-01 0.24631597E+02 - 0.37172940E+03 - - - - 5 1 +6.6372287e+07 2.48386400e+01 7.54677100e-03 1.66333000e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +3.4621419117e+02 3.4621419117e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.8471995540e+01 1.8471995540e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +2.4946999999e+01 +1.5290445725e+00 +2.8662250343e+02 2.8771018449e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 -1.3234624373e+00 -2.4231607655e+01 -1.6739991815e-01 2.4268300005e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 502 -2.3623537562e+01 +2.2702563083e+01 +4.1287092120e+01 5.2707702219e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.24838643E+02 -0 - 1 21 0.53263717E-01 0.24838643E+02 - 1 21 0.28418457E-02 0.24838643E+02 - 0.20767655E+06 - - - - 5 1 +6.6372287e+07 3.39483100e+01 7.54677100e-03 1.55814300e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +2.5448573077e+01 2.5448573077e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.0820396951e+03 1.0820396951e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +4.5361883356e+01 +2.5711927708e+01 -5.3689272592e+02 5.3941876389e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 -1.6783464189e+01 -2.3932337766e+01 -5.3486766152e+02 5.3566580701e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 -2.8578419167e+01 -1.7795899428e+00 +1.5169265433e+01 3.2403697261e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33948308E+02 -0 - 1 2 0.16646767E+00 0.33948308E+02 - 1 -2 0.39151646E-02 0.33948308E+02 - 0.59650818E+03 - - - - 5 1 +6.6372287e+07 4.00572800e+01 7.54677100e-03 1.50779000e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.2066229463e+01 3.2066229463e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.0083738526e+02 3.0083738526e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +3.4238964599e+01 -3.1475020468e+00 -2.1121471239e+02 2.1399501909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.6612873637e+01 +3.9603631259e+01 +6.1874354643e+00 4.3390316167e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.7626090962e+01 -3.6456129213e+01 -6.3743878874e+01 7.5518279467e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40057279E+02 -0 - 1 21 0.49332658E-02 0.40057279E+02 - 1 21 0.46282677E-01 0.40057279E+02 - 0.11855536E+06 - - - - 5 1 +6.6372287e+07 4.37051900e+01 7.54677100e-03 1.48259100e-01 - 2 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +1.7110304904e+03 1.7110304904e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -2.0783382913e+01 2.0783382913e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.4418127206e+01 +2.0747890384e+01 +7.9570356529e+01 8.3485321978e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +3.6111610870e+01 -3.0183395268e+01 +1.6215922235e+03 1.6222750769e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 502 0 -2.1693483664e+01 +9.4355048838e+00 -1.0915472595e+01 2.6053474392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.43705192E+02 -0 - 1 2 0.26323535E+00 0.43705192E+02 - 1 1 0.31974449E-02 0.43705192E+02 - 0.43186860E+03 - - - - 5 1 +6.6372287e+07 3.25233300e+01 7.54677100e-03 1.57176200e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.1530668898e+01 2.1530668898e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -5.8911499310e+02 5.8911499310e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +3.1914103860e+01 -3.6894354070e+01 -1.3312376381e+02 1.4178025208e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.0765125773e+01 +2.9189748902e+01 +2.6051922163e+00 3.1220448433e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.1148978087e+01 +7.7046051684e+00 -4.3706575261e+02 4.3764496149e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32523330E+02 -0 - 1 21 0.33124105E-02 0.32523330E+02 - 1 2 0.90633079E-01 0.32523330E+02 - 0.30633976E+05 - - - - 5 1 +6.6372287e+07 3.61852100e+01 7.54677100e-03 1.53832100e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +4.3002025114e+01 4.3002025114e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.3404139915e+02 3.3404139915e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +5.4545664238e+00 -2.1319807632e+01 -2.9481524350e+02 2.9563544153e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.1100319594e+01 -8.7134369374e+00 +9.7127797801e+00 3.3726724614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -3.6554886018e+01 +3.0033244569e+01 -5.9369103163e+00 4.7681258113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.36185209E+02 -0 - 1 21 0.66156963E-02 0.36185209E+02 - 1 21 0.51390983E-01 0.36185209E+02 - 0.59410236E+05 - - - - 5 1 +6.6372287e+07 2.68952500e+01 7.54677100e-03 1.63518300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.8770752959e+02 2.8770752959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -2.6931152162e+02 2.6931152162e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.0961573832e+01 +2.3688081609e+00 -1.7971774862e+02 1.8095156257e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -3.7349663467e+00 -2.3130388947e+01 -8.6786083310e+01 8.9893209547e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.7226607485e+01 +2.0761580786e+01 +2.8489983990e+02 2.8617427909e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.26895249E+02 -0 - 1 21 0.44262697E-01 0.26895249E+02 - 1 21 0.41432541E-01 0.26895249E+02 - 0.32158164E+04 - - - - 5 1 +6.6372287e+07 2.51016900e+01 7.54677100e-03 1.65954600e-01 - 2 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +6.4617848855e+01 6.4617848855e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -4.5852280566e+01 4.5852280566e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +1.2633388858e+01 +1.7296317379e+01 -2.9732559349e+01 3.6644101767e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +1.3130515800e+01 -2.4553339855e+01 +1.7570358035e+01 3.2924070597e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.5763904658e+01 +7.2570224764e+00 +3.0927769604e+01 4.0901957057e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.25101687E+02 -0 - 1 21 0.70541970E-02 0.25101687E+02 - 1 2 0.99412075E-02 0.25101687E+02 - 0.89083039E+05 - - - - 5 1 +6.6372287e+07 2.65415900e+01 7.54677100e-03 1.63979800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.3037786153e+01 7.3037786153e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -8.2017257442e+01 8.2017257442e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.8697084487e+01 -8.2924898880e+00 -5.9625078565e+01 6.3035675222e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +6.9951105287e+00 +2.5911255642e+01 +5.4380851637e+01 6.0643233464e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -2.5692195015e+01 -1.7618765754e+01 -3.7352443596e+00 3.1376134909e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.26541591E+02 -0 - 1 21 0.11236582E-01 0.26541591E+02 - 1 21 0.12618040E-01 0.26541591E+02 - 0.30903565E+06 - - - - 5 1 +6.6372287e+07 2.27761200e+01 7.54677100e-03 1.69516500e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +8.6173848945e+01 8.6173848945e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.3585034087e+01 6.3585034087e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.7032116927e+01 +8.2973252626e+00 +1.6307827832e+01 3.2642398819e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.9247818195e+01 +1.2926007751e+01 -5.0735466398e+01 5.5782145282e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +7.7842987316e+00 -2.1223333014e+01 +5.7016453425e+01 6.1334338931e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.22776118E+02 -0 - 1 21 0.13257515E-01 0.22776118E+02 - 1 21 0.97823130E-02 0.22776118E+02 - 0.35046139E+06 - - - - 5 1 +6.6372287e+07 3.80456700e+01 7.54677100e-03 1.52310600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3757684306e+01 1.3757684306e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.9694458511e+02 3.9694458511e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 -1.4438846203e+01 -1.7702498483e+01 -8.3619869477e+01 8.6684146783e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.4547368666e+01 -2.2261281669e+01 -2.7669903846e+02 2.7797400684e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.0852246356e-01 +3.9963780152e+01 -2.2867992874e+01 4.6044115794e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.38045671E+02 -0 - 1 21 0.21165667E-02 0.38045671E+02 - 1 21 0.61068400E-01 0.38045671E+02 - 0.26069372E+06 - - - - 5 1 +6.6372287e+07 4.59126200e+01 7.54677100e-03 1.46872300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.0311609080e+03 2.0311609080e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.4721053331e+01 2.4721053331e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +5.4574055215e+00 +4.5277462040e+01 +1.8656515221e+03 1.8662088398e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.1344441190e+01 +1.9716704689e+01 +9.5251287709e+01 1.0219603832e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -3.6801846712e+01 -6.4994166729e+01 +4.5537044828e+01 8.7477083181e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.45912622E+02 -0 - 1 21 0.31248616E+00 0.45912622E+02 - 1 21 0.38032406E-02 0.45912622E+02 - 0.16431983E+04 - - - - 5 1 +6.6372287e+07 3.23160000e+01 7.54677100e-03 1.57381400e-01 - 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +5.8873030751e+02 5.8873030751e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -2.0960881505e+01 2.0960881505e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +1.2271827274e+01 +2.2294515262e+01 +1.4647225131e+02 1.4866661885e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.0219290079e+01 -9.7210766525e-02 -1.3589093295e+01 2.4361703508e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 -3.2491117354e+01 -2.2197304495e+01 +4.3488626799e+02 4.3666286666e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32316004E+02 -0 - 1 21 0.32247515E-02 0.32316004E+02 - 1 1 0.90573881E-01 0.32316004E+02 - 0.20132875E+05 - - - - 5 1 +6.6372287e+07 4.71162200e+01 7.54677100e-03 1.46154700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.5698751231e+01 3.5698751231e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5024197302e+02 1.5024197302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -1.8393329399e+01 -2.0730266036e+01 +1.5134275211e+01 3.1576966011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +3.0318818959e+01 +4.1981229081e+01 -1.1447233348e+02 1.2564063639e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.1925489561e+01 -2.1250963045e+01 -1.5205163517e+01 2.8723121856e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.47116224E+02 -0 - 1 21 0.54921154E-02 0.47116224E+02 - 1 21 0.23114150E-01 0.47116224E+02 - 0.37322948E+06 - - - - 5 1 +6.6372287e+07 5.00477800e+01 7.54677100e-03 1.44508400e-01 - 2 -1 0 0 503 0 +0.0000000000e+00 +0.0000000000e+00 +3.8911714874e+02 3.8911714874e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -3.0237812812e+02 3.0237812812e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +2.7035206944e+01 +1.1996257553e+01 +1.2886486117e+02 1.3221560064e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.2748413249e+01 -4.4788353833e+01 -2.9753120134e+02 3.0174211293e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 502 0 -4.2867936948e+00 +3.2792096280e+01 +2.5540536079e+02 2.5753756328e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.50047775E+02 -0 - 1 2 0.59864176E-01 0.50047775E+02 - 1 2 0.46519712E-01 0.50047775E+02 - 0.13730376E+03 - - - - 5 1 +6.6372287e+07 2.97093000e+01 7.54677100e-03 1.60132400e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.1854806368e+02 1.1854806368e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.8789738375e+02 2.8789738375e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.1808462259e+01 +1.9091314842e+01 +2.7760747717e+01 4.0134105724e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +5.0913227713e+01 -2.3763320633e+01 +7.4385366719e+01 9.3220356927e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -2.9104765455e+01 +4.6720057908e+00 -2.7149543450e+02 2.7309098477e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29709305E+02 -0 - 1 21 0.18238164E-01 0.29709305E+02 - 1 1 0.44291905E-01 0.29709305E+02 - 0.26942502E+04 - - - - 5 1 +6.6372287e+07 2.76081800e+01 7.54677100e-03 1.62613900e-01 - 1 -1 0 0 501 0 +0.0000000000e+00 +0.0000000000e+00 +3.5918383131e+02 3.5918383131e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -9.9599640123e+00 9.9599640123e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -1.0226056517e+01 -1.7566903028e+01 +5.4113018863e+01 5.7804732716e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -1.8623508510e+01 -1.7954792984e+01 +2.3170430073e+02 2.3314393117e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 503 +2.8849565027e+01 +3.5521696012e+01 +6.3406547700e+01 7.8195131441e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27608180E+02 -0 - 1 1 0.55259041E-01 0.27608180E+02 - 1 -4 0.15323024E-02 0.27608180E+02 - 0.31455192E+04 - - - - 5 1 +6.6372287e+07 3.28240500e+01 7.54677100e-03 1.56881800e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.7881981423e+00 5.7881981423e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.2709795693e+03 1.2709795693e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.9221921883e+01 +8.0360733545e+00 -5.6807285970e+02 5.6888071959e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.6963846300e+01 -2.9307832371e+01 -5.0500567831e+01 6.0803194577e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 503 +1.2258075583e+01 +2.1271759017e+01 -6.4661794361e+02 6.4708385326e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32824047E+02 -0 - 1 21 0.89049153E-03 0.32824047E+02 - 1 21 0.19553543E+00 0.32824047E+02 - 0.68369125E+05 - - - - 5 1 +6.6372287e+07 2.96747300e+01 7.54677100e-03 1.60171200e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +4.0441816137e+01 4.0441816137e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.0683900114e+02 1.0683900114e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.6554357024e+01 +3.2973328106e+01 +7.1332905671e+00 4.2933181546e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +2.0427248287e+01 -1.1559769425e+01 +3.8860694556e+00 2.3790802373e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +6.1271087369e+00 -2.1413558681e+01 -7.7416545026e+01 8.0556833358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29674731E+02 -0 - 1 21 0.62218180E-02 0.29674731E+02 - 1 2 0.16436769E-01 0.29674731E+02 - 0.63902127E+05 - - - - 5 1 +6.6372287e+07 2.77849600e+01 7.54677100e-03 1.62394800e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5901367143e+01 5.5901367143e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 -1 0 0 501 0 +0.0000000000e+00 -0.0000000000e+00 -1.6721775392e+02 1.6721775392e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.7214919673e+01 +1.5725661972e+01 -4.6993551561e+00 2.3785160136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 1 1 2 502 0 -7.1368635003e+00 -2.7166369610e+01 -1.4763278291e+02 1.5028102025e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -2 1 1 2 0 503 +2.4351783173e+01 +1.1440707638e+01 +4.1015751290e+01 4.9052940675e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27784960E+02 -0 - 1 4 0.25725808E-01 0.27784960E+02 - 1 -2 0.86002106E-02 0.27784960E+02 - 0.26447976E+03 - - - - 5 1 +6.6372287e+07 5.39590300e+01 7.54677100e-03 1.42508700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +5.9800586950e+01 5.9800586950e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -7.5246990846e+01 7.5246990846e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.7215966258e+01 -1.3481800279e+01 -1.3120982800e+01 2.5501149436e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 -4.4948096317e+01 +2.7284215874e+01 +3.3376063191e+01 6.2279381760e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 +2.7732130059e+01 -1.3802415596e+01 -3.5701484287e+01 4.7267046599e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.53959031E+02 -0 - 1 21 0.92000904E-02 0.53959031E+02 - 1 21 0.11576460E-01 0.53959031E+02 - 0.54280239E+06 - - - - 5 1 +6.6372287e+07 3.74932000e+01 7.54677100e-03 1.52751300e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +7.5616997299e+02 7.5616997299e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -9.5383624010e+00 9.5383624010e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +2.4198598086e+01 -2.8579386523e+01 +4.8877048754e+01 6.1573690634e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -4.1183674764e+01 +1.1831705659e+01 +1.5380528884e+02 1.5966261679e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +1.6985076678e+01 +1.6747680864e+01 +5.4394927300e+02 5.4447202797e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.37493203E+02 -0 - 1 21 0.11633379E+00 0.37493203E+02 - 1 21 0.14674410E-02 0.37493203E+02 - 0.11386933E+06 - - - - 5 1 +6.6372287e+07 4.04284600e+01 7.54677100e-03 1.50508200e-01 - 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +4.0351583823e+02 4.0351583823e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 4 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.3201441918e+01 1.3201441918e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 501 0 +4.2439120380e+01 +1.6559287496e+01 +4.1887418346e+01 6.1885739526e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 502 0 -2.1117804890e+01 +8.9613985453e+00 +2.4763202138e+02 2.4869235283e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 1 1 2 0 501 -2.1321315491e+01 -2.5520686042e+01 +1.0079495659e+02 1.0613918778e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40428461E+02 -0 - 1 21 0.62079350E-01 0.40428461E+02 - 1 4 0.20309914E-02 0.40428461E+02 - 0.80181641E+04 - - - - 5 1 +6.6372287e+07 3.42643000e+01 7.54677100e-03 1.55523200e-01 - 1 -1 0 0 504 0 -0.0000000000e+00 +0.0000000000e+00 +3.3344584104e+03 3.3344584104e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -9.8032652357e-01 9.8032652357e-01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.6378085835e+01 +1.3734600814e+01 +2.6780940543e+02 2.6866104777e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.1482442393e+01 -1.5710524314e+01 +4.2265106582e+02 4.2348818088e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +3.7860528228e+01 +1.9759235008e+00 +2.6430176126e+03 2.6432895082e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34264300E+02 -0 - 1 21 0.15082213E-03 0.34264300E+02 - 1 1 0.51298452E+00 0.34264300E+02 - 0.33282672E+05 - - - - 5 1 +6.6372287e+07 1.19571300e+02 7.54677100e-03 1.24380600e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +8.7854154861e+01 8.7854154861e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -1.7934232150e+03 1.7934232150e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +1.7605310395e+01 +1.3701422770e+01 +2.6274785914e+01 3.4467960701e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 502 0 +1.1756210603e+02 +1.7577983299e+01 -1.7004020433e+03 1.7045518306e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 502 -1.3516741643e+02 -3.1279406068e+01 -3.1441802775e+01 1.4225757860e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.11957126E+03 -0 - 1 2 0.27591127E+00 0.11957126E+03 - 1 -1 0.13516023E-01 0.11957126E+03 - 0.43015636E+02 - - - - 5 1 +6.6372287e+07 2.58481600e+01 7.54677100e-03 1.64911000e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.4121175553e+01 1.4121175553e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -4.5285316425e+02 4.5285316425e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 +1.4043566251e+01 +1.9773590139e+01 -2.4347029469e+02 2.4467529711e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -2.6394599884e+01 -1.3369434684e+00 -6.6055666935e-01 2.6436691472e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +1.2351033633e+01 -1.8436646670e+01 -1.9460113733e+02 1.9586235121e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.25848162E+02 -0 - 1 21 0.21724885E-02 0.25848162E+02 - 1 21 0.69669719E-01 0.25848162E+02 - 0.18427689E+06 - - - - 5 1 +6.6372287e+07 5.64794400e+01 7.54677100e-03 1.41322900e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7881487994e+00 3.7881487994e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -8.3293306677e+02 8.3293306677e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.2042587179e+01 +4.9935512809e+01 -3.4469253261e+02 3.4898764036e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 -1.0400442402e+01 -2.1391345840e+01 -1.5133248122e+02 1.5319033504e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -1.1642144777e+01 -2.8544166969e+01 -3.3311990414e+02 3.3454324016e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.56479436E+02 -0 - 1 21 0.58279180E-03 0.56479436E+02 - 1 1 0.12814362E+00 0.56479436E+02 - 0.18915759E+06 - - - - 5 1 +6.6372287e+07 1.20437600e+02 7.54677100e-03 1.24237700e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.6326880933e+01 7.6326880933e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -5.6424004213e+02 5.6424004213e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.3358633684e+02 -3.2803021929e+01 -2.5791576009e+01 1.3995196687e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +9.1619991808e+01 +2.3833933827e+01 -1.0682884494e+02 1.4273990827e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 +4.1966345031e+01 +8.9690881026e+00 -3.5529274025e+02 3.5787504793e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.12043761E+03 -0 - 1 21 0.11742597E-01 0.12043761E+03 - 1 1 0.86806160E-01 0.12043761E+03 - 0.26061044E+04 - - - - 5 1 +6.6372287e+07 5.37227900e+01 7.54677100e-03 1.42623700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.3304377309e+02 1.3304377309e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.0963777946e+01 7.0963777946e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.5841020033e+01 +3.7513476754e+01 -1.3490766536e+01 4.2897548130e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -5.0497052458e+01 -2.8085691040e+00 +6.9558481825e+01 8.6001295113e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 +6.6338072490e+01 -3.4704907650e+01 +6.0122798510e+00 7.5108707789e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.53722789E+02 -0 - 1 21 0.20468273E-01 0.53722789E+02 - 1 21 0.10917504E-01 0.53722789E+02 - 0.14842924E+06 - - - - 5 1 +6.6372287e+07 4.63666500e+01 7.54677100e-03 1.46598600e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.9404929403e+01 7.9404929403e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.0571876305e+01 4.0571876305e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +3.8096004013e+01 -2.0103472745e+01 +5.5898111257e+01 7.0569497520e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -2.4864947573e+01 +3.4826673187e+00 -8.6196401551e+00 2.6546050287e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.3231056440e+01 +1.6620805427e+01 -8.4454180038e+00 2.2861257901e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.46366646E+02 -0 - 1 21 0.12216143E-01 0.46366646E+02 - 1 21 0.62418271E-02 0.46366646E+02 - 0.93143569E+06 - - - - 5 1 +6.6372287e+07 3.23809700e+01 7.54677100e-03 1.57316900e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.7524908213e+01 2.7524908213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -1.6414512109e+02 1.6414512109e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 +1.0735752809e+01 +2.3888282841e+01 -4.2028929504e+01 4.9521079963e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +8.4348718406e+00 -4.8157198822e+01 -9.9473441559e+01 1.1083874971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -1.9170624649e+01 +2.4268915981e+01 +4.8821581875e+00 3.1310199631e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32380969E+02 -0 - 1 21 0.42346013E-02 0.32380969E+02 - 1 21 0.25253095E-01 0.32380969E+02 - 0.46542943E+06 - - - - 5 1 +6.6372287e+07 3.04619800e+01 7.54677100e-03 1.59303800e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.2711935090e+03 2.2711935090e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -2.7585117854e+00 2.7585117854e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.0648195639e+01 -1.1330743851e+01 +3.5163773314e+02 3.5242563628e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -2.3394837542e+01 -2.0181653530e+01 +1.0496115767e+02 1.0941417724e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +2.7466419032e+00 +3.1512397381e+01 +1.8118361064e+03 1.8121122073e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30461979E+02 -0 - 1 21 0.42438824E-03 0.30461979E+02 - 1 2 0.34941289E+00 0.30461979E+02 - 0.91450792E+05 - - - - 5 1 +6.6372287e+07 5.05952900e+01 7.54677100e-03 1.44215700e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7310225510e+01 1.7310225510e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 -0.0000000000e+00 -0.0000000000e+00 -1.2474989233e+03 1.2474989233e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -2.0960667079e+01 -3.2621697190e+00 -1.3563546253e-01 2.1213432364e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -3.0621090753e+01 +1.1876026904e+01 -3.7308678078e+01 4.9705418959e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +5.1581757832e+01 -8.6138571850e+00 -1.1927443843e+03 1.1938902975e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.50595294E+02 -0 - 1 21 0.26631115E-02 0.50595294E+02 - 1 2 0.19192292E+00 0.50595294E+02 - 0.17875597E+05 - - - - 5 1 +6.6372287e+07 5.35686700e+01 7.54677100e-03 1.42699200e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.0775922111e+01 2.0775922111e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.9382658620e+02 1.9382658620e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -2.0032585171e+01 +8.2484330843e+00 -1.7029580071e+01 2.7556264521e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -3.0721524725e+01 +4.2318877201e+00 -1.1017363691e+01 3.2910534147e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 +5.0754109896e+01 -1.2480320804e+01 -1.4500372033e+02 1.5413570964e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.53568673E+02 -0 - 1 21 0.31962958E-02 0.53568673E+02 - 1 21 0.29819474E-01 0.53568673E+02 - 0.56798716E+06 - - - - 5 1 +6.6372287e+07 3.85025300e+01 7.54677100e-03 1.51953000e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.2289530809e+02 2.2289530809e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 3 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1799706113e+01 1.1799706113e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +2.1037505294e+01 +3.1358606708e+01 +8.1763268338e+01 9.0062039134e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +1.7522488983e+01 -1.3904294537e+01 +4.7427977092e+01 5.2438345109e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 3 1 1 2 501 0 -3.8559994278e+01 -1.7454312171e+01 +8.1904356543e+01 9.2194629956e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.38502528E+02 -0 - 1 21 0.34291583E-01 0.38502528E+02 - 1 3 0.18153396E-02 0.38502528E+02 - 0.34255031E+05 - - - - 5 1 +6.6372287e+07 3.75647200e+01 7.54677100e-03 1.52693700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.1007388093e+00 5.1007388093e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.1898429651e+03 1.1898429651e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.3577055731e+01 +8.6164758199e+00 -5.6840562318e+02 5.6895964152e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.7661209766e+01 -3.2338872055e+01 -8.6569325177e+01 9.4084903347e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +4.1238265497e+01 +2.3722396235e+01 -5.2976727797e+02 5.3189915908e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.37564724E+02 -0 - 1 21 0.78472893E-03 0.37564724E+02 - 1 21 0.18305279E+00 0.37564724E+02 - 0.98499507E+05 - - - - 5 1 +6.6372287e+07 1.43855200e+02 7.54677100e-03 1.20823400e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.7613958321e+02 1.7613958321e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.5721546644e+02 2.5721546644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +1.8697441469e+01 +4.6743956885e+01 +6.3444037986e+01 8.0992208136e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 501 0 +1.5605198954e+01 -1.3052773015e+02 -2.0496954320e+02 2.4350261644e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 -3.4302640423e+01 +8.3783773266e+01 +6.0449621991e+01 1.0886022507e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.14385516E+03 -0 - 1 21 0.27098398E-01 0.14385516E+03 - 1 21 0.39571610E-01 0.14385516E+03 - 0.81522626E+04 - - - - 5 1 +6.6372287e+07 2.99475600e+01 7.54677100e-03 1.59866900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.1735167452e+02 1.1735167452e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -6.7731005686e+01 6.7731005686e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -3.0037339775e+01 +1.9906567203e+01 +7.7442255748e+01 8.5415549954e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 +3.9581584148e+00 -2.3241579138e+01 -5.6222098856e+01 6.0965255831e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +2.6079181360e+01 +3.3350119355e+00 +2.8400511938e+01 3.8701874417e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29947557E+02 -0 - 1 21 0.18054104E-01 0.29947557E+02 - 1 21 0.10420155E-01 0.29947557E+02 - 0.19218372E+06 - - - - 5 1 +6.6372287e+07 5.51267100e+01 7.54677100e-03 1.41950100e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +3.3605621517e+02 3.3605621517e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.6748192173e+02 1.6748192173e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -3.6502084271e+01 +1.2779288039e+01 +2.6845966100e+00 3.8767504664e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 +5.6330187720e+01 -2.0382049136e+01 -1.4305805077e+02 1.5509391950e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 -1.9828103448e+01 +7.6027610964e+00 +3.0894774760e+02 3.0967671273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.55126711E+02 -0 - 1 21 0.51700956E-01 0.55126711E+02 - 1 21 0.25766450E-01 0.55126711E+02 - 0.55367291E+04 - - - - 5 1 +6.6372287e+07 2.38060800e+01 7.54677100e-03 1.67876700e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.6292151055e+01 5.6292151055e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -1.0100350383e+02 1.0100350383e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -1.7078783867e+01 +1.3980353782e+01 +4.3115131723e+01 4.8436037552e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 +1.9550036137e+01 +1.1807248332e+01 -6.8557268747e+00 2.3845670826e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -2.4712522707e+00 -2.5787602114e+01 -8.0970757619e+01 8.5013946502e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.23806083E+02 -0 - 1 21 0.86603311E-02 0.23806083E+02 - 1 21 0.15539000E-01 0.23806083E+02 - 0.32926995E+06 - - - - 5 1 +6.6372287e+07 4.37816800e+01 7.54677100e-03 1.48209400e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +7.6069687389e+01 7.6069687389e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -6.3690305890e+01 6.3690305890e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -3.1851837126e+01 -2.0905814741e+01 -1.0380698209e+01 3.9488625117e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +5.2242018124e+01 +1.1087919075e+01 -1.5683784728e+01 5.5661041227e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 503 -2.0390180998e+01 +9.8178956656e+00 +3.8443864437e+01 4.4610326936e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.43781676E+02 -0 - 1 21 0.97985087E-02 0.43781676E+02 - 1 -2 0.11703029E-01 0.43781676E+02 - 0.29443686E+05 - - - - 5 1 +6.6372287e+07 6.86898000e+01 7.54677100e-03 1.36460600e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +2.1667938332e+02 2.1667938332e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5467804492e+02 1.5467804492e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 505 -1.1038053112e+01 +2.3433804553e+01 -2.7774504919e+01 3.7979006514e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -1.5088204651e+01 +3.2631253632e+01 -1.1446451215e+02 1.1997740276e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +2.6126257764e+01 -5.6065058184e+01 +2.0424035546e+02 2.1340101896e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.68689805E+02 -0 - 1 21 0.33335290E-01 0.68689805E+02 - 1 21 0.23796622E-01 0.68689805E+02 - 0.15057095E+05 - - - - 5 1 +6.6372287e+07 5.62723200e+01 7.54677100e-03 1.41417600e-01 - -2 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.2380787192e+02 1.2380787192e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.0327409785e+03 2.0327409785e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +2.3729827285e+01 -2.7343988978e+01 -1.7426108947e+03 1.7429869560e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -5.3672579255e+00 -2.5936900616e+01 -2.8286809267e+02 2.8410541727e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -2 1 1 2 0 502 -1.8362569359e+01 +5.3280889594e+01 +1.1654588079e+02 1.2945647716e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.56272325E+02 -0 - 1 21 0.31272939E+00 0.56272325E+02 - 1 -2 0.19047365E-01 0.56272325E+02 - 0.69988168E+01 - - - - 5 1 +6.6372287e+07 3.06264600e+01 7.54677100e-03 1.59126500e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.3143843443e+00 7.3143843443e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8991776011e+02 2.8991776011e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -2.7176763448e+01 -5.9122717629e+00 -8.2589250105e+01 8.7146518358e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +3.7114675978e+00 -2.1312114619e+01 -3.4556109190e+01 4.0768933069e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 +2.3465295850e+01 +2.7224386382e+01 -1.6545801647e+02 1.6931669302e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.30626458E+02 -0 - 1 21 0.11252898E-02 0.30626458E+02 - 1 21 0.44602736E-01 0.30626458E+02 - 0.12571056E+07 - - - - 5 1 +6.6372287e+07 2.39754200e+01 7.54677100e-03 1.67617000e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.1086457142e+00 3.1086457142e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -1.1096565876e+03 1.1096565876e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.8074701851e+01 -1.2471200900e+01 -4.7301063874e+01 5.2149940964e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -1.5797907864e+01 -1.5878819740e+01 -2.9957929976e+02 3.0041549170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -2.2767939866e+00 +2.8350020640e+01 -7.5966757822e+02 7.6019980061e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.23975415E+02 -0 - 1 21 0.47825339E-03 0.23975415E+02 - 1 2 0.17071633E+00 0.23975415E+02 - 0.26724670E+06 - - - - 5 1 +6.6372287e+07 2.53344800e+01 7.54677100e-03 1.65624400e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.1570098369e+02 5.1570098369e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.8445706221e+02 1.8445706221e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 +2.4207533199e+01 -7.3828354628e+00 +5.0432335498e+02 5.0495797578e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +8.1618412819e+00 -2.1011090789e+01 -6.9595529015e+00 2.3590611815e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 -3.2369374481e+01 +2.8393926251e+01 -1.6611988060e+02 1.7160945831e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.25334477E+02 -0 - 1 21 0.79338612E-01 0.25334477E+02 - 1 -1 0.28378010E-01 0.25334477E+02 - 0.16118883E+03 - - - - 5 1 +6.6372287e+07 4.96978700e+01 7.54677100e-03 1.44697800e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.1669869678e+01 4.1669869678e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -7.2169785693e+02 7.2169785693e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.0137451528e+01 -1.9749989160e+01 -4.1326305480e+02 4.1385889197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +7.1118147579e-01 -2.6001039854e+01 -2.9419229654e+02 2.9533991805e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 +9.4262700527e+00 +4.5751029015e+01 +2.7427364092e+01 5.4168916585e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.49697867E+02 -0 - 1 21 0.11103045E+00 0.49697867E+02 - 1 -1 0.64107485E-02 0.49697867E+02 - 0.70621395E+03 - - - - 5 1 +6.6372287e+07 4.01879700e+01 7.54677100e-03 1.50683300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +7.1802510669e+01 7.1802510669e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -7.1337629671e+01 7.1337629671e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +5.8213069507e+00 +5.5036070403e+01 -5.3681175091e+00 5.5602817785e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -7.7116246928e+00 -3.4472047188e+01 -2.9891185989e+01 4.6273904012e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +1.8903177421e+00 -2.0564023215e+01 +3.5724184496e+01 4.1263418543e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40187974E+02 -0 - 1 21 0.11046540E-01 0.40187974E+02 - 1 21 0.10975020E-01 0.40187974E+02 - 0.42308800E+06 - - - - 5 1 +6.6372287e+07 2.09126700e+01 7.54677100e-03 1.72776500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.1568669659e+02 2.1568669659e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -4.4598327407e+01 4.4598327407e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.0489102981e+01 +2.1031084145e+01 +3.3096570840e+00 2.3733554552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 505 -2.0964401963e+01 -3.6307327249e+00 +1.9854727555e+02 1.9968402289e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 +1.0475298982e+01 -1.7400351420e+01 -3.0768563453e+01 3.6867446552e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.20912670E+02 -0 - 1 21 0.33182568E-01 0.20912670E+02 - 1 21 0.68612813E-02 0.20912670E+02 - 0.12384532E+06 - - - - 5 1 +6.6372287e+07 2.76487000e+01 7.54677100e-03 1.62563500e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +8.8315219304e+00 8.8315219304e+00 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -8.5548998670e+02 8.5548998670e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -2.6108367324e+01 +2.7834397992e+01 -5.0511958607e+01 6.3307649761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 3 1 1 2 501 0 -1.5502416378e+00 -2.7227504772e+01 -8.3708945045e+01 8.8039353392e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -3 1 1 2 0 503 +2.7658608962e+01 -6.0689322000e-01 -7.1243756111e+02 7.1297450547e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27648700E+02 -0 - 1 21 0.13586955E-02 0.27648700E+02 - 1 21 0.13161386E+00 0.27648700E+02 - 0.94752714E+05 - - - - 5 1 +6.6372287e+07 3.42335000e+01 7.54677100e-03 1.55551500e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +5.5344269194e+01 5.5344269194e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -5.5616791251e+02 5.5616791251e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 502 -5.3992539344e+01 -1.9093964990e+01 -4.2937502340e+02 4.3317742845e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 501 +2.0534474104e+01 +1.3341922147e+01 -1.1811465484e+02 1.2062646145e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +3.3458065241e+01 +5.7520428423e+00 +4.6666034926e+01 5.7708291798e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.34233500E+02 -0 - 1 21 0.85564297E-01 0.34233500E+02 - 1 -1 0.85145026E-02 0.34233500E+02 - 0.83709286E+03 - - - - 5 1 +6.6372287e+07 3.19061800e+01 7.54677100e-03 1.57792600e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +3.0941997221e+01 3.0941997221e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.4106991043e+02 1.4106991043e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -7.0637436764e+00 -2.8412231374e+01 -9.0188811038e+01 9.4821796033e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.2887679806e+01 +4.9262952225e+01 -2.1945566891e+01 5.5448522603e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -5.8239361293e+00 -2.0850720851e+01 +2.0064647242e+00 2.1741589011e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.31906182E+02 -0 - 1 21 0.47603074E-02 0.31906182E+02 - 1 21 0.21703063E-01 0.31906182E+02 - 0.50542998E+06 - - - - 5 1 +6.6372287e+07 3.48184100e+01 7.54677100e-03 1.55021900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +2.1589074046e+03 2.1589074046e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.5961454167e+01 1.5961454167e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +2.0024453480e+01 +1.8334123955e+01 +1.9389015022e+03 1.9390915796e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -3.0260374134e+01 +1.8529083324e+01 +8.4480836704e+00 3.6474474489e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +1.0235920654e+01 -3.6863207279e+01 +1.9559636455e+02 1.9930280466e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.34818413E+02 -0 - 1 21 0.33213937E+00 0.34818413E+02 - 1 21 0.24556100E-02 0.34818413E+02 - 0.27565033E+04 - - - - 5 1 +6.6372287e+07 3.59920600e+01 7.54677100e-03 1.53996300e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +9.7186356638e+01 9.7186356638e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -2.5874356624e+02 2.5874356624e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -1.8438172649e+01 -2.1235714101e+01 -8.7075130972e+01 9.1504099350e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +5.0238810426e+01 +4.6817397829e+00 -1.6209235767e+02 1.6976392189e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -3.1800637777e+01 +1.6553974318e+01 +8.7610279039e+01 9.4661901640e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.35992060E+02 -0 - 1 21 0.14951747E-01 0.35992060E+02 - 1 21 0.39806702E-01 0.35992060E+02 - 0.24770639E+05 - - - - 5 1 +6.6372287e+07 4.43353000e+01 7.54677100e-03 1.47853400e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.1509889175e+01 4.1509889175e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.5021838758e+02 1.5021838758e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +2.1728377512e+01 -4.5038835640e+00 +4.7828765006e+00 2.2699851631e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +3.7288465851e+01 +1.8488032542e+01 -9.7938315943e+01 1.0641499313e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -5.9016843363e+01 -1.3984148978e+01 -1.5553058964e+01 6.2613431996e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.44335296E+02 -0 - 1 21 0.63861368E-02 0.44335296E+02 - 1 21 0.23110521E-01 0.44335296E+02 - 0.28976826E+06 - - - - 5 1 +6.6372287e+07 8.95697900e+01 7.54677100e-03 1.30389700e-01 - -1 -1 0 0 0 502 -0.0000000000e+00 +0.0000000000e+00 +1.0959072147e+02 1.0959072147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 -1 0 0 0 501 +0.0000000000e+00 -0.0000000000e+00 -2.3941333748e+02 2.3941333748e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +4.4433031586e+01 -8.6837769029e+00 +3.1109742872e+01 5.4931943155e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -4 1 1 2 0 502 -7.9188610214e+01 +3.3845172129e+01 -2.1114031298e+02 2.2802754971e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 +3.4755578628e+01 -2.5161395226e+01 +5.0207954089e+01 6.6044566085e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.89569794E+02 -0 - 1 -4 0.36832821E-01 0.89569794E+02 - 1 -1 0.16860111E-01 0.89569794E+02 - 0.65999041E+02 - - - - 5 1 +6.6372287e+07 3.30531100e+01 7.54677100e-03 1.56660200e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +7.4577216351e+00 7.4577216351e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 -1 0 0 504 0 -0.0000000000e+00 -0.0000000000e+00 -2.9293771215e+03 2.9293771215e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -8.0432547113e+00 +3.1734139686e+01 -3.2819056421e+01 4.6355582537e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -6.2607331878e+00 +2.0255158476e+01 -2.7252772679e+02 2.7335111139e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +1.4303987899e+01 -5.1989298162e+01 -2.6165726167e+03 2.6171281493e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.33053112E+02 -0 - 1 21 0.11473423E-02 0.33053112E+02 - 1 2 0.45067321E+00 0.33053112E+02 - 0.95448503E+04 - - - - 5 1 +6.6372287e+07 2.64822300e+01 7.54677100e-03 1.64058200e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +2.0433495267e+02 2.0433495267e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.1621962120e+01 1.1621962120e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -6.1453446806e-01 +2.8655724578e+01 +2.6926488695e+01 3.9326377880e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 +1.9294771642e+01 -8.5428375362e+00 +2.2933975834e+01 3.1164651987e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 -1.8680237174e+01 -2.0112887042e+01 +1.4285252602e+02 1.4546588493e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.26482232E+02 -0 - 1 21 0.17879943E-02 0.26482232E+02 - 1 2 0.31436144E-01 0.26482232E+02 - 0.22563568E+06 - - - - 5 1 +6.6372287e+07 2.94114000e+01 7.54677100e-03 1.60468700e-01 - 1 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +7.1456949129e+01 7.1456949129e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 -1 0 0 502 0 +0.0000000000e+00 -0.0000000000e+00 -3.2341513368e+01 3.2341513368e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.2633702387e+01 +1.6779116547e+01 +3.5382422326e+01 4.1146871057e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +3.4850371802e+01 -4.6645490224e-01 +1.2205098572e-01 3.4853706995e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -2.2216669414e+01 -1.6312661644e+01 +3.6109624495e+00 2.7797884444e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.29411403E+02 -0 - 1 2 0.49756174E-02 0.29411403E+02 - 1 1 0.10993377E-01 0.29411403E+02 - 0.84192782E+04 - - - - 5 1 +6.6372287e+07 4.12907800e+01 7.54677100e-03 1.49892200e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +1.6408792035e+01 1.6408792035e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -2.8426721197e+02 2.8426721197e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -9.0227533210e+00 +4.0120314210e+01 -1.6777925940e+02 1.7274527367e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -2.8940986847e+01 -8.3287328478e+00 -3.5522385380e+01 4.6570252025e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +3.7963740168e+01 -3.1791581362e+01 -6.4556775153e+01 8.1360478312e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.41290780E+02 -0 - 1 21 0.25244298E-02 0.41290780E+02 - 1 21 0.43733413E-01 0.41290780E+02 - 0.38789728E+06 - - - - 5 1 +6.6372287e+07 3.01075800e+01 7.54677100e-03 1.59690300e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.0837614737e+02 1.0837614737e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 -1 0 0 503 0 +0.0000000000e+00 -0.0000000000e+00 -5.0187655412e+02 5.0187655412e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -2.4244911852e+01 +2.4839805338e+00 -4.4874897660e+01 5.1066058689e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 1 1 1 2 502 0 +9.4171551279e+00 +2.3796264682e+01 -4.5137403450e+02 4.5209895382e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 502 +1.4827756724e+01 -2.6280245215e+01 +1.0274852541e+02 1.0708768898e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.30107583E+02 -0 - 1 1 0.77211778E-01 0.30107583E+02 - 1 -1 0.16673253E-01 0.30107583E+02 - 0.11578566E+03 - - - - 5 1 +6.6372287e+07 2.75831300e+01 7.54677100e-03 1.62645100e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +1.5119270639e+01 1.5119270639e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 504 503 -0.0000000000e+00 -0.0000000000e+00 -4.4797372094e+02 4.4797372094e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.3933439080e+01 -1.5349180970e+01 -7.6519214979e+00 2.9444163518e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 -9.1468995899e+00 +2.8848403826e+01 -5.3965016364e+01 6.1871796213e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 503 -1.4786539490e+01 -1.3499222856e+01 -3.7123751244e+02 3.7177703185e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27583132E+02 -0 - 1 21 0.23260418E-02 0.27583132E+02 - 1 21 0.68919030E-01 0.27583132E+02 - 0.17059051E+06 - - - - 5 1 +6.6372287e+07 5.38232700e+01 7.54677100e-03 1.42574700e-01 - 21 -1 0 0 502 501 +0.0000000000e+00 +0.0000000000e+00 +8.6931327192e+02 8.6931327192e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3878826862e+01 1.3878826862e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.1384767832e+01 -5.5414417532e+01 +5.7810173998e+02 5.8086314427e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 503 +4.8586431572e+01 +3.3136350907e+01 +2.3303180192e+02 2.4033826119e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 504 -3.7201663740e+01 +2.2278066625e+01 +4.4300903156e+01 6.1990693318e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.53823273E+02 -0 - 1 21 0.13374046E+00 0.53823273E+02 - 1 21 0.21352048E-02 0.53823273E+02 - 0.45957317E+05 - - - - 5 1 +6.6372287e+07 4.09907500e+01 7.54677100e-03 1.50104500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +7.0992522075e+01 7.0992522075e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.3949703176e+02 1.3949703176e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +1.1659851347e+01 -5.1077643128e+01 +4.3516059870e+01 6.8106719401e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +3.0482205563e+00 +2.8158917183e+01 -1.2900096167e+00 2.8352784524e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -1.4708071904e+01 +2.2918725946e+01 -1.1073055994e+02 1.1403004991e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.40990751E+02 -0 - 1 21 0.10921927E-01 0.40990751E+02 - 1 21 0.21461082E-01 0.40990751E+02 - 0.13404950E+06 - - - - 5 1 +6.6372287e+07 3.36149800e+01 7.54677100e-03 1.56125500e-01 - 21 -1 0 0 502 503 +0.0000000000e+00 +0.0000000000e+00 +7.9720643936e+01 7.9720643936e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -1.1015809106e+02 1.1015809106e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +2.0832938167e+01 +2.8798086026e+01 -8.7908167480e+01 9.4821869741e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 4 1 1 2 502 0 -3.2596791056e+01 +4.0076403627e+00 +4.9338106023e+01 5.9269390704e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 501 +1.1763852889e+01 -3.2805726389e+01 +8.1326143350e+00 3.5787474550e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33614975E+02 -0 - 1 21 0.12264714E-01 0.33614975E+02 - 1 1 0.16947399E-01 0.33614975E+02 - 0.16891392E+05 - - - - 5 1 +6.6372287e+07 5.25551000e+01 7.54677100e-03 1.43202800e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +4.5821819927e+02 4.5821819927e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -1.2383942127e+01 1.2383942127e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -5.0926951541e+01 -5.0815690566e+00 +5.0883321493e+01 7.2169863125e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 +2.4440122995e+01 +3.5331599133e+00 +1.4625618020e+02 1.4832623867e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 +2.6486828546e+01 +1.5484091433e+00 +2.4869475544e+02 2.5010603960e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.52555102E+02 -0 - 1 21 0.19052222E-02 0.52555102E+02 - 1 -1 0.70495095E-01 0.52555102E+02 - 0.23587364E+05 - - - - 5 1 +6.6372287e+07 4.46278200e+01 7.54677100e-03 1.47667900e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +5.5593250322e+02 5.5593250322e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 505 502 -0.0000000000e+00 -0.0000000000e+00 -1.8836808285e+02 1.8836808285e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +4.0713374762e+01 +1.7975906273e+01 +5.4492854609e+02 5.4674293085e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 -4.8243013951e+01 -3.8385710795e+01 -1.0265016479e+02 1.1974100183e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 501 +7.5296391891e+00 +2.0409804522e+01 -7.4713960932e+01 7.7816653392e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.44627825E+02 -0 - 1 21 0.85528077E-01 0.44627825E+02 - 1 21 0.28979705E-01 0.44627825E+02 - 0.15745256E+04 - - - - 5 1 +6.6372287e+07 3.37382900e+01 7.54677100e-03 1.56009900e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.5881756914e+03 1.5881756914e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 +0.0000000000e+00 -0.0000000000e+00 -1.3626711614e+01 1.3626711614e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 -2.5839259073e+01 -9.7303516134e+00 +1.6154599506e+02 1.6388854618e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 501 -1.7069497306e+01 +2.9008026152e+01 +1.1420766537e+01 3.5542470761e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 2 1 1 2 501 0 +4.2908756379e+01 -1.9277674538e+01 +1.4015822182e+03 1.4023713861e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.33738290E+02 -0 - 1 21 0.20964184E-02 0.33738290E+02 - 1 2 0.24433458E+00 0.33738290E+02 - 0.17276941E+05 - - - - 5 1 +6.6372287e+07 3.21182600e+01 7.54677100e-03 1.57578900e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +4.7751679366e+02 4.7751679366e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -2.2525410541e+01 2.2525410541e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -1.0544572028e+01 +2.8771952473e+01 +1.5170782840e+02 1.5477169782e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.6106310540e+01 +5.1888225486e+00 -1.0981232855e+01 2.8793242340e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -1.5561738512e+01 -3.3960775022e+01 +3.1426478757e+02 3.1647726404e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.32118265E+02 -0 - 1 21 0.73464115E-01 0.32118265E+02 - 1 21 0.34654481E-02 0.32118265E+02 - 0.80826114E+05 - - - - 5 1 +6.6372287e+07 4.09504400e+01 7.54677100e-03 1.50133100e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +1.7536299147e+02 1.7536299147e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -3.6964015600e+01 3.6964015600e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +4.0714726769e+00 +5.7850454719e+01 +1.0890596398e+00 5.8003776186e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +4.7732900088e+00 -1.9654732976e+01 +1.3161726099e+01 2.4131387440e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 504 -8.8447626857e+00 -3.8195721743e+01 +1.2414819013e+02 1.3019184344e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.40950439E+02 -0 - 1 21 0.26978921E-01 0.40950439E+02 - 1 21 0.56867717E-02 0.40950439E+02 - 0.26246574E+06 - - - - 5 1 +6.6372287e+07 2.73967700e+01 7.54677100e-03 1.62878500e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +9.4621731867e+02 9.4621731867e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -5.2344682715e+02 5.2344682715e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 +3.5097651064e+01 +5.1743549524e+00 +5.7624905467e+01 6.7670146960e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -1.6769759782e+01 -2.1740470328e+01 +8.8306196865e+02 8.8348871717e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 502 -1.8327891283e+01 +1.6566115375e+01 -5.1791638260e+02 5.1850528169e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.27396771E+02 -0 - 1 21 0.14557190E+00 0.27396771E+02 - 1 21 0.80530281E-01 0.27396771E+02 - 0.67303146E+02 - - - - 5 1 +6.6372287e+07 8.17190300e+01 7.54677100e-03 1.32424300e-01 - 1 -1 0 0 502 0 +0.0000000000e+00 +0.0000000000e+00 +3.6186655864e+02 3.6186655864e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 -1 0 0 503 0 -0.0000000000e+00 -0.0000000000e+00 -8.5536214190e+01 8.5536214190e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +6.9748665717e+01 -2.1225395602e+01 +1.1519982450e+02 1.3633192345e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 501 0 +1.4237054539e+01 +2.7062837406e+01 +2.1101094416e+02 2.1321517170e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 1 1 1 2 502 0 -8.3985720256e+01 -5.8374418043e+00 -4.9880424207e+01 9.7855677673e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.81719032E+02 -0 - 1 1 0.55671777E-01 0.81719032E+02 - 1 1 0.13159418E-01 0.81719032E+02 - 0.34676915E+03 - - - - 5 1 +6.6372287e+07 3.21808800e+01 7.54677100e-03 1.57516200e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.6785736656e+03 1.6785736656e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.6055706089e+00 2.6055706089e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 501 -1.3779710329e+01 -3.5600343603e+01 +9.7923046116e+02 9.7997426545e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.3134215327e+01 +6.9265799703e+00 +7.6205484473e+01 7.9940260774e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -1 1 1 2 0 504 -9.3545049987e+00 +2.8673763633e+01 +6.2053214934e+02 6.2126470997e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32180882E+02 -0 - 1 21 0.40085836E-03 0.32180882E+02 - 1 -1 0.25824124E+00 0.32180882E+02 - 0.94656396E+04 - - - - 5 1 +6.6372287e+07 2.72410900e+01 7.54677100e-03 1.63075300e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +4.0924620761e+01 4.0924620761e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 504 502 -0.0000000000e+00 -0.0000000000e+00 -1.0688357873e+02 1.0688357873e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 502 +1.4260677542e+01 +2.3044763190e+01 +2.6464656953e+01 3.7878834487e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -3.4418262802e+01 -1.1596463544e+01 -5.7672141314e+01 6.8155488882e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 +2.0157585259e+01 -1.1448299646e+01 -3.4751473609e+01 4.1773876122e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.27241089E+02 -0 - 1 21 0.62960956E-02 0.27241089E+02 - 1 21 0.16443627E-01 0.27241089E+02 - 0.51232088E+06 - - - - 5 1 +6.6372287e+07 8.96432400e+01 7.54677100e-03 1.30371800e-01 - 21 -1 0 0 503 501 +0.0000000000e+00 +0.0000000000e+00 +1.9106265508e+03 1.9106265508e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -8.3144018617e+00 8.3144018617e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +2.0112949282e+01 -5.3085600688e+01 +9.4969449400e+02 9.5138963820e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 505 -4.7004645833e+00 -3.1846193563e+01 +6.9718679537e+02 6.9792958244e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 502 -1.5412484699e+01 +8.4931794252e+01 +2.5543085956e+02 2.6962173201e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.89643239E+02 -0 - 1 21 0.29394220E+00 0.89643239E+02 - 1 21 0.12791403E-02 0.89643239E+02 - 0.10444343E+05 - - - - 5 1 +6.6372287e+07 3.51788400e+01 7.54677100e-03 1.54701800e-01 - 21 -1 0 0 504 502 +0.0000000000e+00 +0.0000000000e+00 +2.4477402762e+01 2.4477402762e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 -1 0 0 0 501 -0.0000000000e+00 -0.0000000000e+00 -1.3548028651e+02 1.3548028651e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 +5.2535145793e+00 -1.9356004202e+01 -7.2383362573e+01 7.5110621695e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 503 -1.7320229400e+01 -2.4467895105e+01 -1.3062058210e+01 3.2699932753e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -4 1 1 2 0 502 +1.2066714821e+01 +4.3823899307e+01 -2.5557462961e+01 5.2147134821e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.35178836E+02 -0 - 1 21 0.37657543E-02 0.35178836E+02 - 1 -4 0.20843121E-01 0.35178836E+02 - 0.26617371E+05 - - - - 5 1 +6.6372287e+07 5.53124500e+01 7.54677100e-03 1.41862700e-01 - 21 -1 0 0 504 501 +0.0000000000e+00 +0.0000000000e+00 +2.4660412160e+02 2.4660412160e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 503 502 -0.0000000000e+00 -0.0000000000e+00 -7.8448765078e+02 7.8448765078e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 505 -4.2504266063e+01 -2.5589506438e+01 -2.1846702105e+02 2.2402962920e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +6.8994906150e+01 -2.2979517961e+01 -5.5770982325e+02 5.6243097556e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 505 501 -2.6490640086e+01 +4.8569024399e+01 +2.3829331512e+02 2.4463116762e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.55312446E+02 -0 - 1 21 0.37939096E-01 0.55312446E+02 - 1 21 0.12069041E+00 0.55312446E+02 - 0.42114463E+03 - - - - 5 1 +6.6372287e+07 3.03757500e+01 7.54677100e-03 1.59397200e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +1.2971462251e+03 1.2971462251e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -5.6448232432e+00 5.6448232432e+00 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 -2.3055791365e+00 +2.0675048888e+01 +2.1686626098e+02 2.1786176464e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.9371916466e+01 -8.2619650311e+00 +4.2957332697e+01 5.2690625120e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +3.1677495603e+01 -1.2413083857e+01 +1.0316778082e+03 1.0322386586e+03 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30375748E+02 -0 - 1 21 0.86843538E-03 0.30375748E+02 - 1 2 0.19956072E+00 0.30375748E+02 - 0.91540288E+05 - - - - 5 1 +6.6372287e+07 4.60134200e+01 7.54677100e-03 1.46811200e-01 - -1 -1 0 0 0 501 -0.0000000000e+00 +0.0000000000e+00 +1.7829784629e+01 1.7829784629e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 503 502 +0.0000000000e+00 -0.0000000000e+00 -2.8514522110e+02 2.8514522110e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 501 -3.0860485485e+01 +3.4588417908e+01 -6.7479109992e+01 8.1866711813e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 504 502 +2.6117598747e+01 +1.5550519447e+01 -1.3824680405e+02 1.4154902490e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -1 1 1 2 0 504 +4.7428867379e+00 -5.0138937355e+01 -6.1589522426e+01 7.9559269015e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.46013420E+02 -0 - 1 21 0.43868495E-01 0.46013420E+02 - 1 -1 0.27430438E-02 0.46013420E+02 - 0.16895399E+05 - - -
From 0e4c680606971eb0e500646afce2e76efa94c181 Mon Sep 17 00:00:00 2001 From: Zenny Jovi Joestar Wettersten Date: Wed, 31 Jul 2024 11:53:22 +0200 Subject: [PATCH 17/76] new makefiles and export routines for rwgt_runner/driver --- .../template_files/gpu/cudacpp_driver.mk | 1125 ++++++++++++++++ .../template_files/gpu/cudacpp_runner.mk | 1128 +++++++++++++++++ .../iolibs/template_files/gpu/cudacpp_src.mk | 3 + .../CUDACPP_SA_OUTPUT/model_handling.py | 41 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 85 +- 5 files changed, 2367 insertions(+), 15 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk new file mode 100644 index 0000000000..867da1fdb1 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk @@ -0,0 +1,1125 @@ +# Copyright (C) 2020-2024 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Include cudacpp_config.mk + +# Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported (and configure defaults if no user-defined choices exist) +# Stop with an error if BACKEND=cuda and nvcc is missing or if BACKEND=hip and hipcc is missing. +# Determine CUDACPP_BUILDDIR from a DIRTAG based on BACKEND, FPTYPE, HELINL, HRDCOD and from the user-defined choice of USEBUILDDIR +include ../src/cudacpp_config.mk + +# Export CUDACPP_BUILDDIR (so that there is no need to check/define it again in cudacpp_src.mk) +export CUDACPP_BUILDDIR + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: including make_opts is crucial for MG5aMC flag consistency/documentation +# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp) +ifneq ($(wildcard ../Source/make_opts),) + include ../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Redefine BACKEND if the current value is 'cppauto' + +# Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) +ifeq ($(BACKEND),cppauto) + ifeq ($(UNAME_P),ppc64le) + override BACKEND = cppsse4 + else ifeq ($(UNAME_P),arm) + override BACKEND = cppsse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override BACKEND = cppnone + ###$(warning Using BACKEND='$(BACKEND)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override BACKEND = cpp512y + else + override BACKEND = cppavx2 + ###ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + ### $(warning Using BACKEND='$(BACKEND)' because host does not support avx512vl) + ###else + ### $(warning Using BACKEND='$(BACKEND)' because this is faster than avx512vl for clang) + ###endif + endif + $(info BACKEND=$(BACKEND) (was cppauto)) +else + $(info BACKEND='$(BACKEND)') +endif + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) + CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) + CXXFLAGS += -mmacosx-version-min=11.3 +endif + +# Export CXXFLAGS (so that there is no need to check/define it again in cudacpp_src.mk) +export CXXFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) +#=== (note, this is done also for C++, as NVTX and CURAND/ROCRAND are also needed by the C++ backends) + +# Set CUDA_HOME from the path to nvcc, if it exists +override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + +# Set HIP_HOME from the path to hipcc, if it exists +override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists +# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) +ifneq ($(CUDA_HOME),) + USE_NVTX ?=-DUSE_NVTX + CUDA_INC = -I$(CUDA_HOME)/include/ +else + override USE_NVTX= + override CUDA_INC= +endif + +# NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) +# - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. +# If both CUDA and HIP were installed, then CUDA took precedence over HIP, and the only way to force HIP builds was to disable +# CUDA builds by setting CUDA_HOME to an invalid value (as CUDA_HOME took precdence over PATH to find the installation of nvcc). +# Similarly, C++-only builds could be forced by setting CUDA_HOME and/or HIP_HOME to invalid values. A check for an invalid nvcc +# in CUDA_HOME or an invalid hipcc HIP_HOME was necessary to ensure this logic, and had to be performed at the very beginning. +# - In the new implementation (PR #798), separate individual builds are performed for one specific C++/AVX mode, for CUDA or +# for HIP. The choice of the type of build is taken depending on the value of the BACKEND variable (replacing the AVX variable). +# Unlike what happened in the past, nvcc and hipcc must have already been added to PATH. Using 'which nvcc' and 'which hipcc', +# their existence and their location is checked, and the variables CUDA_HOME and HIP_HOME are internally set by this makefile. +# This must be still done before backend-specific customizations, e.g. because CURAND and NVTX are also used in C++ builds. +# Note also that a preliminary check for nvcc and hipcc if BACKEND is cuda or hip is performed in cudacpp_config.mk. +# - Note also that the REQUIRE_CUDA variable (which was used in the past, e.g. for CI tests on GPU #443) is now (PR #798) no +# longer necessary, as it is now equivalent to BACKEND=cuda. Similarly, there is no need to introduce a REQUIRE_HIP variable. + +#=== Configure the CUDA or HIP compiler (only for the CUDA and HIP backends) +#=== (NB: throughout all makefiles, an empty GPUCC is used to indicate that this is a C++ build, i.e. that BACKEND is neither cuda nor hip!) + +ifeq ($(BACKEND),cuda) + + # If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) + # This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below + ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(error BACKEND=$(BACKEND) but CUDA builds are not supported for multi-word CXX "$(CXX)") + endif + + # Set GPUCC as $(CUDA_HOME)/bin/nvcc (it was already checked above that this exists) + GPUCC = $(CUDA_HOME)/bin/nvcc + XCOMPILERFLAG = -Xcompiler + GPULANGUAGE = cu + GPUSUFFIX = cuda + + # Basic compiler flags (optimization and includes) + GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + + # NVidia CUDA architecture flags + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # This will embed device code for 70, and PTX for 70+. + # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + GPUFLAGS += $(GPUARCHFLAGS) + + # Other NVidia-specific flags + CUDA_OPTFLAGS = -lineinfo + GPUFLAGS += $(CUDA_OPTFLAGS) + + # NVCC version + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + + # Fast math + GPUFLAGS += -use_fast_math + + # Extra build warnings + ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow + + # CUDA includes and NVTX + GPUFLAGS += $(CUDA_INC) $(USE_NVTX) + + # C++ standard + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + + # For nvcc, use -maxrregcount to control the maximum number of registries (this does not exist in hipcc) + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + + # Set the host C++ compiler for nvcc via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifeq ($(BACKEND),hip) + + # Set GPUCC as $(HIP_HOME)/bin/hipcc (it was already checked above that this exists) + GPUCC = $(HIP_HOME)/bin/hipcc + XCOMPILERFLAG = + GPULANGUAGE = hip + GPUSUFFIX = hip + + # Basic compiler flags (optimization and includes) + GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + + # AMD HIP architecture flags + GPUARCHFLAGS = --offload-arch=gfx90a + GPUFLAGS += $(GPUARCHFLAGS) + + # Other AMD-specific flags + GPUFLAGS += -target x86_64-linux-gnu -DHIP_PLATFORM=amd + + # Fast math (is -DHIP_FAST_MATH equivalent to -ffast-math?) + GPUFLAGS += -DHIP_FAST_MATH + + # Extra build warnings + ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow + + # HIP includes + HIP_INC = -I$(HIP_HOME)/include/ + GPUFLAGS += $(HIP_INC) + + # C++ standard + GPUFLAGS += -std=c++17 + +else + + # Backend is neither cuda nor hip + override GPUCC= + override GPUFLAGS= + + # Sanity check, this should never happen: if GPUCC is empty, then this is a C++ build, i.e. BACKEND is neither cuda nor hip. + # In practice, in the following, "ifeq ($(GPUCC),)" is equivalent to "ifneq ($(findstring cpp,$(BACKEND)),)". + # Conversely, note that GPUFLAGS is non-empty also for C++ builds, but it is never used in that case. + ifeq ($(findstring cpp,$(BACKEND)),) + $(error INTERNAL ERROR! Unknown backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) + endif + +endif + +# Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) +export GPUCC +export GPUFLAGS +export GPULANGUAGE +export GPUSUFFIX + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../test +TESTDIRLOCAL = ../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for cppnone and cppsse4 + # Throughput references without the extra flags below: cppnone=1.41-1.42E6, cppsse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for cppnone, loses ~1%% for cppsse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to cppnone=4.08-4.12E6, cppsse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= $(XCOMPILERFLAG) -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults for OMPFLAGS + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) + override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) + override OMPFLAGS = -fopenmp + ###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) + override OMPFLAGS = -fopenmp + ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) + override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) + ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else + override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms + ###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + ifneq ($(CUDA_HOME),) + # By default, assume that curand is installed if a CUDA installation exists + override HASCURAND = hasCurand + else + override HASCURAND = hasNoCurand + endif + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +ifeq ($(UNAME_P),ppc64le) + ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifeq ($(BACKEND),cppavx2) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + else ifeq ($(BACKEND),cpp512y) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + else ifeq ($(BACKEND),cpp512z) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifeq ($(BACKEND),cppavx2) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + else ifeq ($(BACKEND),cpp512y) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + else ifeq ($(BACKEND),cpp512z) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(BACKEND),cppnone) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(BACKEND),cppavx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(BACKEND),cpp512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(BACKEND),cpp512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + endif +else + ifeq ($(BACKEND),cppnone) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(BACKEND),cppavx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(BACKEND),cpp512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(BACKEND),cpp512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + endif +endif +# For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? +ifeq ($(GPUCC),) + CXXFLAGS+= $(AVXFLAGS) +endif + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE='$(FPTYPE)') +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL='$(HELINL)') +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD='$(HRDCOD)') +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info RNDLIBFLAGS=$(RNDLIBFLAGS)) + +#------------------------------------------------------------------------------- + +#=== Configure Position-Independent Code +CXXFLAGS += -fPIC +GPUFLAGS += $(XCOMPILERFLAG) -fPIC + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of builds with different random number generators) +override TAG = $(patsubst cpp%%,%%,$(BACKEND))_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Export TAG (so that there is no need to check/define it again in cudacpp_src.mk) +export TAG + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +override BUILDDIR = $(CUDACPP_BUILDDIR) +ifeq ($(USEBUILDDIR),1) + override LIBDIR = ../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR == 1)) +else + override LIBDIR = ../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR != 1)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override GPULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override GPULIBFLAGSRPATH2 = +else + # RPATH to gpu/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override GPULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking gpu/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override GPULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + +.PHONY: all $(DIRS) + +DIRS := $(wildcard P*) + +# Construct the library paths +cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) +gpu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_$(GPUSUFFIX)"}'; done) + +ifeq ($(GPUCC),) + cxx_rwgt=$(BUILDDIR)/rwgt_driver_cpp.exe + rwgtlib := $(addprefix ,$(addsuffix /librwgt_cpp.so,$(DIRS))) +else + gpu_rwgt=$(BUILDDIR)/rwgt_driver_$(GPUSUFFIX).exe + rwgtlib := $(addprefix ,$(addsuffix /librwgt_$(GPUSUFFIX).so,$(DIRS))) +endif + +# Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) +.DEFAULT_GOAL := all.$(TAG) + +# First target (default goal) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(rwgtlib) $(cxx_rwgt) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(rwgtlib) $(gpu_rwgt) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUDA_OPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +endif + +# # Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +# $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +# $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/rwgt_driver_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/rwgt_driver_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) + +# # Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +# $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +# $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +# ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +# $(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) +# endif +# ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +# $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) +# endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUDA_INC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%_cpp.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ + +# Generic target and build rules: objects from CUDA or HIP compilation +ifneq ($(GPUCC),) +$(BUILDDIR)/%%_$(GPUSUFFIX).o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(INCFLAGS) $(GPUFLAGS) -c -x $(GPULANGUAGE) $< -o $@ +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +# processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +# ###$(info processid_short=$(processid_short)) + +# MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +# cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +# cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o + +# ifneq ($(GPUCC),) +# MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) +# gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementKernels_$(GPUSUFFIX).o $(BUILDDIR)/BridgeKernels_$(GPUSUFFIX).o $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o +# gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o +# endif + +# # Target (and build rules): C++ and CUDA/HIP shared libraries +# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o +# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) +# $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +# ifneq ($(GPUCC),) +# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o +# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o +# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) +# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# # Bypass std::filesystem completely to ease portability on LUMI #803 +# #ifneq ($(findstring hipcc,$(GPUCC)),) +# # $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +# #else +# # $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# #endif +# endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +#HERE LOOP MAKE OVER P DIRECTORIES AND ADD RWGT_RUNNER_LIBS +# Ensure each librwgt.a depends on its directory being built +$(rwgtlib): + @$(MAKE) -C $(@D) VARIABLE=true + +# Target (and build rules): C++ and CUDA/HIP standalone executables +$(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(rwgtlib) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_proclibs) $(rwgtlib) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(gpu_rwgt): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_rwgt): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(gpu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(gpu_rwgt): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_rwgt): $(BUILDDIR)/$(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(DIRS) + $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) -l$(gpu_proclibs) $(rwgtlib) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +$(BUILDDIR)/%%_fortran.o : %%.f *.inc + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%_fortran.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc + +# ifeq ($(UNAME_S),Darwin) +# $(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +# endif +# $(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +# else +# $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +# endif + +# ifneq ($(GPUCC),) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# endif +# ifeq ($(UNAME_S),Darwin) +# $(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +# endif +# $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) +# endif +# endif + +#------------------------------------------------------------------------------- + +# # Target (and build rules): test objects and test executable +# ifeq ($(GPUCC),) +# $(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +# $(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions +# else +# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) +# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt +# $(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +# endif + +# ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) +# ifeq ($(GPUCC),) +# $(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +# $(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests +# else +# $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) +# $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +# $(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +# endif +# endif + +# ifeq ($(GPUCC),) +# $(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +# $(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +# $(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o +# else +# $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) +# $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +# ifneq ($(shell $(CXX) --version | grep ^Intel),) +# $(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +# $(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +# else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +# $(gpu_testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +# endif +# $(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +# endif + +# ifeq ($(GPUCC),) +# $(cxx_testmain): $(GTESTLIBS) +# $(cxx_testmain): INCFLAGS += $(GTESTINC) +# $(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +# else +# $(gpu_testmain): $(GTESTLIBS) +# $(gpu_testmain): INCFLAGS += $(GTESTINC) +# $(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +# endif + +# ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) +# ifneq ($(OMPFLAGS),) +# ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +# $(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +# else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +# $(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +# ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +# ###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +# else +# $(cxx_testmain): LIBFLAGS += -lgomp +# endif +# endif +# endif + +# # Test quadmath in testmisc.cc tests for constexpr_math #627 +# ###ifeq ($(GPUCC),) +# ###$(cxx_testmain): LIBFLAGS += -lquadmath +# ###else +# ###$(gpu_testmain): LIBFLAGS += -lquadmath +# ###endif + +# # Bypass std::filesystem completely to ease portability on LUMI #803 +# ###ifneq ($(findstring hipcc,$(GPUCC)),) +# ###$(gpu_testmain): LIBFLAGS += -lstdc++fs +# ###endif + +# ifeq ($(GPUCC),) # link only runTest_cpp.o +# $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) +# $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +# else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +# $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +# $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 +# $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +# else +# $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda +# endif +# endif + +# # Use target gtestlibs to build only googletest +# ifneq ($(GTESTLIBS),) +# gtestlibs: $(GTESTLIBS) +# endif + +# # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +# $(GTESTLIBS): +# ifneq ($(shell which flock 2>/dev/null),) +# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +# flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +# else +# if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +# endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all BACKEND modes (each BACKEND mode in a separate build directory) +# Split the bldall target into separate targets to allow parallel 'make -j bldall' builds +# (Obsolete hack, no longer needed as there is no INCDIR: add a fbridge.inc dependency to bldall, to ensure it is only copied once for all BACKEND modes) +bldcuda: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cuda -f $(CUDACPP_MAKEFILE) + +bldhip: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=hip -f $(CUDACPP_MAKEFILE) + +bldnone: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppnone -f $(CUDACPP_MAKEFILE) + +bldsse4: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppsse4 -f $(CUDACPP_MAKEFILE) + +bldavx2: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppavx2 -f $(CUDACPP_MAKEFILE) + +bld512y: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cpp512y -f $(CUDACPP_MAKEFILE) + +bld512z: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cpp512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 +bldavxs: bldnone bldsse4 +else ifeq ($(UNAME_P),arm) +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 +bldavxs: bldnone bldsse4 +else +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavx2 bld512y bld512z +bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z +endif + +ifneq ($(HIP_HOME),) +ifneq ($(CUDA_HOME),) +bldall: bldhip bldcuda bldavxs +else +bldall: bldhip bldavxs +endif +else +ifneq ($(CUDA_HOME),) +bldall: bldcuda bldavxs +else +bldall: bldavxs +endif +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib*.so +endif + $(MAKE) -C ../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck + +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) +runTest: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif + +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif + +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif + +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif + +# Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +cuda-memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk new file mode 100644 index 0000000000..98a5a289e9 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -0,0 +1,1128 @@ +# Copyright (C) 2020-2024 CERN and UCLouvain. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. + +#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) +#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts +#=== NB: use 'override' to ensure that the value can not be modified from the outside +override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') + +#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories +override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk + +#------------------------------------------------------------------------------- + +#=== Include cudacpp_config.mk + +# Check that the user-defined choices of BACKEND, FPTYPE, HELINL, HRDCOD are supported (and configure defaults if no user-defined choices exist) +# Stop with an error if BACKEND=cuda and nvcc is missing or if BACKEND=hip and hipcc is missing. +# Determine CUDACPP_BUILDDIR from a DIRTAG based on BACKEND, FPTYPE, HELINL, HRDCOD and from the user-defined choice of USEBUILDDIR +include ../../src/cudacpp_config.mk + +# Export CUDACPP_BUILDDIR (so that there is no need to check/define it again in cudacpp_src.mk) +export CUDACPP_BUILDDIR + +#------------------------------------------------------------------------------- + +#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) + +SHELL := /bin/bash + +#------------------------------------------------------------------------------- + +#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) + +# Detect O/S kernel (Linux, Darwin...) +UNAME_S := $(shell uname -s) +###$(info UNAME_S='$(UNAME_S)') + +# Detect architecture (x86_64, ppc64le...) +UNAME_P := $(shell uname -p) +###$(info UNAME_P='$(UNAME_P)') + +#------------------------------------------------------------------------------- + +#=== Include the common MG5aMC Makefile options + +# OM: including make_opts is crucial for MG5aMC flag consistency/documentation +# AV: disable the inclusion of make_opts if the file has not been generated (standalone cudacpp) +ifneq ($(wildcard ../../Source/make_opts),) + include ../../Source/make_opts +endif + +#------------------------------------------------------------------------------- + +#=== Redefine BACKEND if the current value is 'cppauto' + +# Set the default BACKEND choice corresponding to 'cppauto' (the 'best' C++ vectorization available: eventually use native instead?) +ifeq ($(BACKEND),cppauto) + ifeq ($(UNAME_P),ppc64le) + override BACKEND = cppsse4 + else ifeq ($(UNAME_P),arm) + override BACKEND = cppsse4 + else ifeq ($(wildcard /proc/cpuinfo),) + override BACKEND = cppnone + ###$(warning Using BACKEND='$(BACKEND)' because host SIMD features cannot be read from /proc/cpuinfo) + else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) + override BACKEND = cpp512y + else + override BACKEND = cppavx2 + ###ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) + ### $(warning Using BACKEND='$(BACKEND)' because host does not support avx512vl) + ###else + ### $(warning Using BACKEND='$(BACKEND)' because this is faster than avx512vl for clang) + ###endif + endif + $(info BACKEND=$(BACKEND) (was cppauto)) +else + $(info BACKEND='$(BACKEND)') +endif + +#------------------------------------------------------------------------------- + +#=== Configure the C++ compiler + +CXXFLAGS = $(OPTFLAGS) -std=c++17 -Wall -Wshadow -Wextra +ifeq ($(shell $(CXX) --version | grep ^nvc++),) + CXXFLAGS += -ffast-math # see issue #117 +endif +###CXXFLAGS+= -Ofast # performance is not different from --fast-math +###CXXFLAGS+= -g # FOR DEBUGGING ONLY + +# Optionally add debug flags to display the full list of flags (eg on Darwin) +###CXXFLAGS+= -v + +# Note: AR, CXX and FC are implicitly defined if not set externally +# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html + +# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" +ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) + CXXFLAGS += -mmacosx-version-min=11.3 +endif + +# Export CXXFLAGS (so that there is no need to check/define it again in cudacpp_src.mk) +export CXXFLAGS + +#------------------------------------------------------------------------------- + +#=== Configure the GPU compiler (CUDA or HIP) +#=== (note, this is done also for C++, as NVTX and CURAND/ROCRAND are also needed by the C++ backends) + +# Set CUDA_HOME from the path to nvcc, if it exists +override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) + +# Set HIP_HOME from the path to hipcc, if it exists +override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) + +# Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists +# (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) +ifneq ($(CUDA_HOME),) + USE_NVTX ?=-DUSE_NVTX + CUDA_INC = -I$(CUDA_HOME)/include/ +else + override USE_NVTX= + override CUDA_INC= +endif + +# NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) +# - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. +# If both CUDA and HIP were installed, then CUDA took precedence over HIP, and the only way to force HIP builds was to disable +# CUDA builds by setting CUDA_HOME to an invalid value (as CUDA_HOME took precdence over PATH to find the installation of nvcc). +# Similarly, C++-only builds could be forced by setting CUDA_HOME and/or HIP_HOME to invalid values. A check for an invalid nvcc +# in CUDA_HOME or an invalid hipcc HIP_HOME was necessary to ensure this logic, and had to be performed at the very beginning. +# - In the new implementation (PR #798), separate individual builds are performed for one specific C++/AVX mode, for CUDA or +# for HIP. The choice of the type of build is taken depending on the value of the BACKEND variable (replacing the AVX variable). +# Unlike what happened in the past, nvcc and hipcc must have already been added to PATH. Using 'which nvcc' and 'which hipcc', +# their existence and their location is checked, and the variables CUDA_HOME and HIP_HOME are internally set by this makefile. +# This must be still done before backend-specific customizations, e.g. because CURAND and NVTX are also used in C++ builds. +# Note also that a preliminary check for nvcc and hipcc if BACKEND is cuda or hip is performed in cudacpp_config.mk. +# - Note also that the REQUIRE_CUDA variable (which was used in the past, e.g. for CI tests on GPU #443) is now (PR #798) no +# longer necessary, as it is now equivalent to BACKEND=cuda. Similarly, there is no need to introduce a REQUIRE_HIP variable. + +#=== Configure the CUDA or HIP compiler (only for the CUDA and HIP backends) +#=== (NB: throughout all makefiles, an empty GPUCC is used to indicate that this is a C++ build, i.e. that BACKEND is neither cuda nor hip!) + +ifeq ($(BACKEND),cuda) + + # If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) + # This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below + ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside + $(error BACKEND=$(BACKEND) but CUDA builds are not supported for multi-word CXX "$(CXX)") + endif + + # Set GPUCC as $(CUDA_HOME)/bin/nvcc (it was already checked above that this exists) + GPUCC = $(CUDA_HOME)/bin/nvcc + XCOMPILERFLAG = -Xcompiler + GPULANGUAGE = cu + GPUSUFFIX = cuda + + # Basic compiler flags (optimization and includes) + GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + + # NVidia CUDA architecture flags + # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ + # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). + # This will embed device code for 70, and PTX for 70+. + # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). + # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). + MADGRAPH_CUDA_ARCHITECTURE ?= 70 + ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 + ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 + comma:=, + GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) + GPUFLAGS += $(GPUARCHFLAGS) + + # Other NVidia-specific flags + CUDA_OPTFLAGS = -lineinfo + GPUFLAGS += $(CUDA_OPTFLAGS) + + # NVCC version + ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) + + # Fast math + GPUFLAGS += -use_fast_math + + # Extra build warnings + ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow + + # CUDA includes and NVTX + GPUFLAGS += $(CUDA_INC) $(USE_NVTX) + + # C++ standard + GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h + + # For nvcc, use -maxrregcount to control the maximum number of registries (this does not exist in hipcc) + # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) + ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) + + # Set the host C++ compiler for nvcc via "-ccbin " + # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) + GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) + + # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) + ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) + GPUFLAGS += -allow-unsupported-compiler + endif + +else ifeq ($(BACKEND),hip) + + # Set GPUCC as $(HIP_HOME)/bin/hipcc (it was already checked above that this exists) + GPUCC = $(HIP_HOME)/bin/hipcc + XCOMPILERFLAG = + GPULANGUAGE = hip + GPUSUFFIX = hip + + # Basic compiler flags (optimization and includes) + GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + + # AMD HIP architecture flags + GPUARCHFLAGS = --offload-arch=gfx90a + GPUFLAGS += $(GPUARCHFLAGS) + + # Other AMD-specific flags + GPUFLAGS += -target x86_64-linux-gnu -DHIP_PLATFORM=amd + + # Fast math (is -DHIP_FAST_MATH equivalent to -ffast-math?) + GPUFLAGS += -DHIP_FAST_MATH + + # Extra build warnings + ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow + + # HIP includes + HIP_INC = -I$(HIP_HOME)/include/ + GPUFLAGS += $(HIP_INC) + + # C++ standard + GPUFLAGS += -std=c++17 + +else + + # Backend is neither cuda nor hip + override GPUCC= + override GPUFLAGS= + + # Sanity check, this should never happen: if GPUCC is empty, then this is a C++ build, i.e. BACKEND is neither cuda nor hip. + # In practice, in the following, "ifeq ($(GPUCC),)" is equivalent to "ifneq ($(findstring cpp,$(BACKEND)),)". + # Conversely, note that GPUFLAGS is non-empty also for C++ builds, but it is never used in that case. + ifeq ($(findstring cpp,$(BACKEND)),) + $(error INTERNAL ERROR! Unknown backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) + endif + +endif + +# Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) +export GPUCC +export GPUFLAGS +export GPULANGUAGE +export GPUSUFFIX + +#------------------------------------------------------------------------------- + +#=== Configure ccache for C++ and CUDA/HIP builds + +# Enable ccache if USECCACHE=1 +ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) + override CXX:=ccache $(CXX) +endif +#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) +# override AR:=ccache $(AR) +#endif +ifneq ($(GPUCC),) + ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) + override GPUCC:=ccache $(GPUCC) + endif +endif + +#------------------------------------------------------------------------------- + +#=== Configure common compiler flags for C++ and CUDA/HIP + +INCFLAGS = -I. +OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here + +# Dependency on src directory +ifeq ($(GPUCC),) +MG5AMC_COMMONLIB = mg5amc_common_cpp +else +MG5AMC_COMMONLIB = mg5amc_common_$(GPUSUFFIX) +endif +LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +INCFLAGS += -I../../src + +# Compiler-specific googletest build directory (#125 and #738) +ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) + override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) + override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) + override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) +else + override CXXNAME = unknown +endif +###$(info CXXNAME=$(CXXNAME)) +override CXXNAMESUFFIX = _$(CXXNAME) + +# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +export CXXNAMESUFFIX + +# Dependency on test directory +# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) +# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) +###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation +###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation +TESTDIRCOMMON = ../../../../../test +TESTDIRLOCAL = ../../test +ifneq ($(wildcard $(GTEST_ROOT)),) + TESTDIR = +else ifneq ($(LOCALGTEST),) + TESTDIR=$(TESTDIRLOCAL) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) + TESTDIR = $(TESTDIRCOMMON) + GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) +else + TESTDIR = +endif +ifneq ($(GTEST_ROOT),) + GTESTLIBDIR = $(GTEST_ROOT)/lib64/ + GTESTLIBS = $(GTESTLIBDIR)/libgtest.a + GTESTINC = -I$(GTEST_ROOT)/include +else + GTESTLIBDIR = + GTESTLIBS = + GTESTINC = +endif +###$(info GTEST_ROOT = $(GTEST_ROOT)) +###$(info LOCALGTEST = $(LOCALGTEST)) +###$(info TESTDIR = $(TESTDIR)) + +#------------------------------------------------------------------------------- + +#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP + +# PowerPC-specific CXX compiler flags (being reviewed) +ifeq ($(UNAME_P),ppc64le) + CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for cppnone and cppsse4 + # Throughput references without the extra flags below: cppnone=1.41-1.42E6, cppsse4=2.15-2.19E6 + ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change + ###CXXFLAGS+= -fpeel-loops # no change + ###CXXFLAGS+= -funroll-loops # gains ~1%% for cppnone, loses ~1%% for cppsse4 + ###CXXFLAGS+= -ftree-vectorize # no change + ###CXXFLAGS+= -flto # would increase to cppnone=4.08-4.12E6, cppsse4=4.99-5.03E6! +else + ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... + ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) +endif + +# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) +ifeq ($(UNAME_P),ppc64le) + GPUFLAGS+= $(XCOMPILERFLAG) -mno-float128 +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults for OMPFLAGS + +# Set the default OMPFLAGS choice +ifneq ($(findstring hipcc,$(GPUCC)),) + override OMPFLAGS = # disable OpenMP MT when using hipcc #802 +else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) + override OMPFLAGS = -fopenmp + ###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) +else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) + override OMPFLAGS = -fopenmp + ###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) +###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) +else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) + override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) + ###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) +else + override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms + ###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) +endif + +#------------------------------------------------------------------------------- + +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + ifneq ($(CUDA_HOME),) + # By default, assume that curand is installed if a CUDA installation exists + override HASCURAND = hasCurand + else + override HASCURAND = hasNoCurand + endif + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD + +# Set the build flags appropriate to OMPFLAGS +$(info OMPFLAGS=$(OMPFLAGS)) +CXXFLAGS += $(OMPFLAGS) + +# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone") +# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] +# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] +ifeq ($(UNAME_P),ppc64le) + ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) + else ifeq ($(BACKEND),cppavx2) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + else ifeq ($(BACKEND),cpp512y) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + else ifeq ($(BACKEND),cpp512z) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) + endif +else ifeq ($(UNAME_P),arm) + ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) + else ifeq ($(BACKEND),cppavx2) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + else ifeq ($(BACKEND),cpp512y) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + else ifeq ($(BACKEND),cpp512z) + $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) + endif +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + ifeq ($(BACKEND),cppnone) + override AVXFLAGS = -mno-sse3 # no SIMD + else ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) + else ifeq ($(BACKEND),cppavx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(BACKEND),cpp512y) + override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(BACKEND),cpp512z) + override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + endif +else + ifeq ($(BACKEND),cppnone) + override AVXFLAGS = -march=x86-64 # no SIMD (see #588) + else ifeq ($(BACKEND),cppsse4) + override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) + else ifeq ($(BACKEND),cppavx2) + override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] + else ifeq ($(BACKEND),cpp512y) + override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] + else ifeq ($(BACKEND),cpp512z) + override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) + endif +endif +# For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? +ifeq ($(GPUCC),) + CXXFLAGS+= $(AVXFLAGS) +endif + +# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") +$(info FPTYPE='$(FPTYPE)') +ifeq ($(FPTYPE),d) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE +else ifeq ($(FPTYPE),f) + CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT +else ifeq ($(FPTYPE),m) + CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT +else + $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) +endif + +# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") +$(info HELINL='$(HELINL)') +ifeq ($(HELINL),1) + CXXFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS +else ifneq ($(HELINL),0) + $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) +endif + +# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") +$(info HRDCOD='$(HRDCOD)') +ifeq ($(HRDCOD),1) + CXXFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM +else ifneq ($(HRDCOD),0) + $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) +endif + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! +else + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info RNDLIBFLAGS=$(RNDLIBFLAGS)) + +#------------------------------------------------------------------------------- + +#=== Configure Position-Independent Code +CXXFLAGS += -fPIC +GPUFLAGS += $(XCOMPILERFLAG) -fPIC + +#------------------------------------------------------------------------------- + +#=== Configure build directories and build lockfiles === + +# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) +# (Rationale: avoid mixing of builds with different random number generators) +override TAG = $(patsubst cpp%%,%%,$(BACKEND))_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) + +# Export TAG (so that there is no need to check/define it again in cudacpp_src.mk) +export TAG + +# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 +override BUILDDIR = $(CUDACPP_BUILDDIR) +ifeq ($(USEBUILDDIR),1) + override LIBDIR = ../../lib/$(BUILDDIR) + override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR == 1)) +else + override LIBDIR = ../../lib + override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' + $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR != 1)) +endif +###override INCDIR = ../../include +###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) + +# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH +# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) +# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary +ifeq ($(UNAME_S),Darwin) + override CXXLIBFLAGSRPATH = + override GPULIBFLAGSRPATH = + override CXXLIBFLAGSRPATH2 = + override GPULIBFLAGSRPATH2 = +else + # RPATH to gpu/cpp libs when linking executables + override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) + override GPULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) + # RPATH to common lib when linking gpu/cpp libs + override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' + override GPULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' +endif + +# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) +override RUNTIME = + +#=============================================================================== +#=== Makefile TARGETS and build rules below +#=============================================================================== + + +ifeq ($(GPUCC),) + cxx_checkmain=$(BUILDDIR)/check_cpp.exe + cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe + cxx_rwgtlib=$(BUILDDIR)/librwgt_cpp.so + cxx_testmain=$(BUILDDIR)/runTest_cpp.exe +else + gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe + gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe + gpu_rwgtlib=$(BUILDDIR)/librwgt_$(GPUSUFFIX).so + gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif + +# Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) +.DEFAULT_GOAL := all.$(TAG) + +# First target (default goal) +ifeq ($(GPUCC),) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(cxx_rwgtlib) $(if $(GTESTLIBS),$(cxx_testmain)) +else +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(gpu_rwgtlib) $(if $(GTESTLIBS),$(gpu_testmain)) +endif + +# Target (and build options): debug +MAKEDEBUG= +debug: OPTFLAGS = -g -O0 +debug: CUDA_OPTFLAGS = -G +debug: MAKEDEBUG := debug +debug: all.$(TAG) + +# Target: tag-specific build lockfiles +override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` +$(BUILDDIR)/.build.$(TAG): + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi + @touch $(BUILDDIR)/.build.$(TAG) + +# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) +# Added edgecase for HIP compilation +ifeq ($(shell $(CXX) --version | grep ^nvc++),) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) +$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math +$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math +endif + +# Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/rwgt_runner_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) + +# Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o +$(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/rwgt_runner_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) +ifeq ($(HASCURAND),hasCurand) # curand headers, #679 +$(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) +endif +ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +$(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) +endif + +# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) +ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) +ifneq ($(GPUCC),) +GPUFLAGS += -Wno-deprecated-builtins +endif +endif + +# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) +# This patch does remove the warning, but I prefer to keep it disabled for the moment... +###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) +###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option +###ifneq ($(GPUCC),) +###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option +###endif +###endif + +#### Apply special build flags only to CPPProcess.o (-flto) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto + +#### Apply special build flags only to CPPProcess.o (AVXFLAGS) +###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) + +# Generic target and build rules: objects from C++ compilation +# (NB do not include CUDA_INC here! add it only for NVTX or curand #679) +$(BUILDDIR)/%%_cpp.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(CXX) $(CPPFLAGS) $(INCFLAGS) $(CXXFLAGS) -c $< -o $@ + +# Generic target and build rules: objects from CUDA or HIP compilation +ifneq ($(GPUCC),) +$(BUILDDIR)/%%_$(GPUSUFFIX).o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(GPUCC) $(CPPFLAGS) $(INCFLAGS) $(GPUFLAGS) -c -x $(GPULANGUAGE) $< -o $@ +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): common (src) library +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so + +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) + +#------------------------------------------------------------------------------- + +processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') +###$(info processid_short=$(processid_short)) + +MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp +cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o +cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o + +ifneq ($(GPUCC),) +MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) +gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementKernels_$(GPUSUFFIX).o $(BUILDDIR)/BridgeKernels_$(GPUSUFFIX).o $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o +gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o +endif + +# Target (and build rules): C++ and CUDA/HIP shared libraries +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o +$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) + $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) + +ifneq ($(GPUCC),) +$(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o +$(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o +$(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) + $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +# Bypass std::filesystem completely to ease portability on LUMI #803 +#ifneq ($(findstring hipcc,$(GPUCC)),) +# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs +#else +# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) +#endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): Fortran include files +###$(INCDIR)/%%.inc : ../%%.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### \cp $< $@ + +#------------------------------------------------------------------------------- + +# Target (and build rules): C++ and CUDA/HIP standalone executables +$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) + +# Target (and build rules): C++ rwgt libraries +cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner_cpp.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) +$(cxx_rwgtlib): LIBFLAGS += $(CXXLIBFLAGSRPATH) +$(cxx_rwgtlib): $(BUILDDIR)/rwgt_runner_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o + $(CXX) -shared -o $@ $(BUILDDIR)/rwgt_runner_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(gpu_checkmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_checkmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(gpu_checkmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o + $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) +gpu_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(gpu_objects_exe) +$(gpu_rwgtlib): $(gpu_rwgtfiles) $(gpu_objects_lib) + $(GPUCC) -shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_GPULIB) +endif + +#------------------------------------------------------------------------------- + +# Generic target and build rules: objects from Fortran compilation +$(BUILDDIR)/%%_fortran.o : %%.f *.inc + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + $(FC) -I. -c $< -o $@ + +# Generic target and build rules: objects from Fortran compilation +###$(BUILDDIR)/%%_fortran.o : %%.f *.inc +### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi +### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi +### $(FC) -I. -I$(INCDIR) -c $< -o $@ + +# Target (and build rules): Fortran standalone executables +###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc + +ifeq ($(UNAME_S),Darwin) +$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ +else + $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) +endif + +ifneq ($(GPUCC),) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +endif +ifeq ($(UNAME_S),Darwin) +$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 +endif +$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) +endif +endif + +#------------------------------------------------------------------------------- + +# Target (and build rules): test objects and test executable +ifeq ($(GPUCC),) +$(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt +$(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions +else +$(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) +$(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +$(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt +$(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions +endif + +ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) +ifeq ($(GPUCC),) +$(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests +else +$(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) +$(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +$(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests +endif +endif + +ifeq ($(GPUCC),) +$(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) +$(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) +$(cxx_testmain): $(BUILDDIR)/runTest_cpp.o +$(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o +else +$(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) +$(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(gpu_testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o +$(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o +endif + +ifeq ($(GPUCC),) +$(cxx_testmain): $(GTESTLIBS) +$(cxx_testmain): INCFLAGS += $(GTESTINC) +$(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +else +$(gpu_testmain): $(GTESTLIBS) +$(gpu_testmain): INCFLAGS += $(GTESTINC) +$(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc +endif + +ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) +ifneq ($(OMPFLAGS),) +ifneq ($(shell $(CXX) --version | egrep '^Intel'),) +$(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) +else ifneq ($(shell $(CXX) --version | egrep '^clang'),) +$(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 +###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) +else +$(cxx_testmain): LIBFLAGS += -lgomp +endif +endif +endif + +# Test quadmath in testmisc.cc tests for constexpr_math #627 +###ifeq ($(GPUCC),) +###$(cxx_testmain): LIBFLAGS += -lquadmath +###else +###$(gpu_testmain): LIBFLAGS += -lquadmath +###endif + +# Bypass std::filesystem completely to ease portability on LUMI #803 +###ifneq ($(findstring hipcc,$(GPUCC)),) +###$(gpu_testmain): LIBFLAGS += -lstdc++fs +###endif + +ifeq ($(GPUCC),) # link only runTest_cpp.o +$(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) +else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) +$(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +$(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) +ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 +else + $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda +endif +endif + +# Use target gtestlibs to build only googletest +ifneq ($(GTESTLIBS),) +gtestlibs: $(GTESTLIBS) +endif + +# Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 +$(GTESTLIBS): +ifneq ($(shell which flock 2>/dev/null),) + @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi + flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) +else + if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi +endif + +#------------------------------------------------------------------------------- + +# Target: build all targets in all BACKEND modes (each BACKEND mode in a separate build directory) +# Split the bldall target into separate targets to allow parallel 'make -j bldall' builds +# (Obsolete hack, no longer needed as there is no INCDIR: add a fbridge.inc dependency to bldall, to ensure it is only copied once for all BACKEND modes) +bldcuda: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cuda -f $(CUDACPP_MAKEFILE) + +bldhip: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=hip -f $(CUDACPP_MAKEFILE) + +bldnone: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppnone -f $(CUDACPP_MAKEFILE) + +bldsse4: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppsse4 -f $(CUDACPP_MAKEFILE) + +bldavx2: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cppavx2 -f $(CUDACPP_MAKEFILE) + +bld512y: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cpp512y -f $(CUDACPP_MAKEFILE) + +bld512z: + @echo + $(MAKE) USEBUILDDIR=1 BACKEND=cpp512z -f $(CUDACPP_MAKEFILE) + +ifeq ($(UNAME_P),ppc64le) +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 +bldavxs: bldnone bldsse4 +else ifeq ($(UNAME_P),arm) +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 +bldavxs: bldnone bldsse4 +else +###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4 bldavx2 bld512y bld512z +bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z +endif + +ifneq ($(HIP_HOME),) +ifneq ($(CUDA_HOME),) +bldall: bldhip bldcuda bldavxs +else +bldall: bldhip bldavxs +endif +else +ifneq ($(CUDA_HOME),) +bldall: bldcuda bldavxs +else +bldall: bldavxs +endif +endif + +#------------------------------------------------------------------------------- + +# Target: clean the builds +.PHONY: clean + +clean: +ifeq ($(USEBUILDDIR),1) + rm -rf $(BUILDDIR) +else + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(LIBDIR)/lib*.so +endif + $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) +### rm -rf $(INCDIR) + +cleanall: + @echo + $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) + @echo + $(MAKE) USEBUILDDIR=0 -C ../../src cleanall -f $(CUDACPP_SRC_MAKEFILE) + rm -rf build.* + +# Target: clean the builds as well as the gtest installation(s) +distclean: cleanall +ifneq ($(wildcard $(TESTDIRCOMMON)),) + $(MAKE) -C $(TESTDIRCOMMON) clean +endif + $(MAKE) -C $(TESTDIRLOCAL) clean + +#------------------------------------------------------------------------------- + +# Target: show system and compiler information +info: + @echo "" + @uname -spn # e.g. Linux nodename.cern.ch x86_64 +ifeq ($(UNAME_S),Darwin) + @sysctl -a | grep -i brand + @sysctl -a | grep machdep.cpu | grep features || true + @sysctl -a | grep hw.physicalcpu: + @sysctl -a | grep hw.logicalcpu: +else + @cat /proc/cpuinfo | grep "model name" | sort -u + @cat /proc/cpuinfo | grep "flags" | sort -u + @cat /proc/cpuinfo | grep "cpu cores" | sort -u + @cat /proc/cpuinfo | grep "physical id" | sort -u +endif + @echo "" +ifneq ($(shell which nvidia-smi 2>/dev/null),) + nvidia-smi -L + @echo "" +endif + @echo USECCACHE=$(USECCACHE) +ifeq ($(USECCACHE),1) + ccache --version | head -1 +endif + @echo "" + @echo GPUCC=$(GPUCC) +ifneq ($(GPUCC),) + $(GPUCC) --version +endif + @echo "" + @echo CXX=$(CXX) +ifneq ($(shell $(CXX) --version | grep ^clang),) + @echo $(CXX) -v + @$(CXX) -v |& egrep -v '(Found|multilib)' + @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' +else + $(CXX) --version +endif + @echo "" + @echo FC=$(FC) + $(FC) --version + +#------------------------------------------------------------------------------- + +# Target: 'make test' (execute runTest.exe, and compare check.exe with fcheck.exe) +# [NB: THIS IS WHAT IS TESTED IN THE GITHUB CI!] +# [NB: This used to be called 'make check' but the name has been changed as this has nothing to do with 'check.exe'] +test: runTest cmpFcheck + +# Target: runTest (run the C++ or CUDA/HIP test executable runTest.exe) +runTest: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/runTest_cpp.exe +else + $(RUNTIME) $(BUILDDIR)/runTest_$(GPUSUFFIX).exe +endif + +# Target: runCheck (run the C++ or CUDA/HIP standalone executable check.exe, with a small number of events) +runCheck: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/check_cpp.exe -p 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 +endif + +# Target: runFcheck (run the Fortran standalone executable - with C++ or CUDA/HIP MEs - fcheck.exe, with a small number of events) +runFcheck: all.$(TAG) +ifeq ($(GPUCC),) + $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 +else + $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 +endif + +# Target: cmpFcheck (compare ME results from the C++/CUDA/HIP and Fortran with C++/CUDA/HIP MEs standalone executables, with a small number of events) +cmpFcheck: all.$(TAG) + @echo +ifeq ($(GPUCC),) + @echo "$(BUILDDIR)/check_cpp.exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_cpp.exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_cpp.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_cpp.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +else + @echo "$(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2" + @echo "$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2" + @me1=$(shell $(RUNTIME) $(BUILDDIR)/check_$(GPUSUFFIX).exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck_$(GPUSUFFIX).exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/GPU) = $${me1}"; echo "Avg ME (F77/GPU) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/GPU) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi +endif + +# Target: cuda-memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) +cuda-memcheck: all.$(TAG) + $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/check_$(GPUSUFFIX).exe -p 2 32 2 + +#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index 897de8caa8..dedc398029 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -144,6 +144,9 @@ endif #------------------------------------------------------------------------------- cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) +#cxx_objects+=$(addprefix $(BUILDDIR)/, REX_cpp.o) # ZW: not all functionality from REX needed for teawREX is in the header, so for now just include REX.cc in teawREX.cc +cxx_objects+=$(addprefix $(BUILDDIR)/, teawREX_cpp.o) +cxx_objects+=$(addprefix $(BUILDDIR)/, rwgt_instance_cpp.o) ifeq ($(GPUCC),) cxx_objects+=$(addprefix $(BUILDDIR)/, Parameters_%(model)s_cpp.o) else diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 44b054d9b6..2287a58b84 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2112,16 +2112,37 @@ def get_rwgt_legs(self, process): """Return string with particle ids and status in the REX std::pair format""" return ",".join(["{\"%i\",\"%i\"}" % (leg.get('state'), leg.get('id')) \ for leg in process.get('legs')]).replace('0', '-1') + + def get_rwgt_legs_vec(self, processes): + """Return string with vectors of particle ids and statuses""" + prtSets = [] + for k in range(len(processes)): + prtSets.append("{" + self.get_rwgt_legs(processes[k]) + "}") + return ",".join(prtSets) def get_init_prts_vec(self, process): """Return string with initial state particle ids for use in REX event sorting""" prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 0]) return "{" + prts + "}" + def get_init_prts_vecs(self, processes): + """Return string with vectors of initial state particle ids""" + prtSets = [] + for k in range(len(processes)): + prtSets.append(self.get_init_prts_vec(processes[k])) + return ",".join(prtSets) + def get_fin_prts_vec(self, process): """Return string with final state particle ids for use in REX event sorting""" prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 1]) return "{" + prts + "}" + + def get_fin_prts_vecs(self, processes): + """Return string with vectors of final state particle ids""" + prtSets = [] + for k in range(len(processes)): + prtSets.append(self.get_fin_prts_vec(processes[k])) + return ",".join(prtSets) def get_rwgt_procMap(self, process): """Return string with particle states and order in the REX procMap format""" @@ -2163,6 +2184,16 @@ def write_rwgt_header(self): with open(os.path.join(self.path, 'rwgt_runner.h'), 'w') as ff: ff.write(rwgt_h) + def edit_rwgt_header(self): + """Adds process-specific details to the rwgt_runner.h template""" + replace_dict = super().get_process_class_definitions(write=False) + replace_dict['process_namespace'] = self.get_proc_dir() + replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() + template = open(pjoin(self.template_path,'REX', 'rwgt_runner.h'),'r').read() + ff = open(pjoin(self.path, 'rwgt_runner.h'),'w') + ff.write(template % replace_dict) + ff.close() + def edit_rwgt_runner(self): """Create the rwgt_runner.cc file for the REX reweighting""" ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') @@ -2172,11 +2203,10 @@ def edit_rwgt_runner(self): # rwgt_runner = self.get_proc_dir() + self.rwgt_template replace_dict['process_namespace'] = self.get_proc_dir() replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() - replace_dict['init_prt_ids'] = self.get_init_prts_vec(self.matrix_elements[0].get('processes')[0]) - replace_dict['fin_prt_ids'] = self.get_fin_prts_vec(self.matrix_elements[0].get('processes')[0]) - replace_dict['process_event'] = self.get_rwgt_legs(self.matrix_elements[0].get('processes')[0]) + replace_dict['init_prt_ids'] = self.get_init_prts_vecs(self.matrix_elements[0].get('processes')) + replace_dict['fin_prt_ids'] = self.get_fin_prts_vecs(self.matrix_elements[0].get('processes')) + replace_dict['process_events'] = self.get_rwgt_legs_vec(self.matrix_elements[0].get('processes')) template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() - self.write_rwgt_header() ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') ff.write(template % replace_dict) ff.close() @@ -2188,7 +2218,8 @@ def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') super().generate_process_files() - misc.sprint('Generating rwgt_runner file') + misc.sprint('Generating rwgt_runner files') + self.edit_rwgt_header() self.edit_rwgt_runner() misc.sprint('Finished generating rwgt files') diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index cce954413b..da534086d9 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -347,17 +347,18 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): rwgt_names = [] proc_lines = [] - s = PLUGINDIR + '/madgraph/iolibs/template_files/' from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', + s+'gpu/constexpr_math.h', + s+'gpu/cudacpp_config.mk', s+'CMake/src/CMakeLists.txt', s+'REX/REX.cc', s+'REX/teawREX.cc', s+'REX/REX.h', s+'REX/teawREX.h', - s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'], + s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc' ], 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', @@ -378,15 +379,10 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', s+'gpu/perf.py', s+'gpu/profile.sh', s+'CMake/SubProcesses/CMakeLists.txt', - s+'gpu/cudacpp_rex_driver.mk', + s+'gpu/cudacpp_driver.mk', s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], 'test': [s+'gpu/cudacpp_test.mk']} -# from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') -# from_template['SubProcesses'].append(s+'REX/REX.hpp') -# from_template['SubProcesses'].append(s+'REX/teawREX.hpp') -# from_template['SubProcesses'].append(s+'gpu/cudacpp_rex_driver.mk') - to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', @@ -412,11 +408,77 @@ class RWGT_ProcessExporter(PLUGIN_ProcessExporter): 'perf.py', 'profile.sh', 'rwgt_instance.h', 'REX.h', 'teawREX.h'] +# s = PLUGINDIR + '/madgraph/iolibs/template_files/' +# from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', +# s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], +# 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], +# 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', +# s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', +# s+'CMake/src/CMakeLists.txt', +# s+'REX/REX.cc', s+'REX/teawREX.cc', +# s+'REX/REX.h', s+'REX/teawREX.h', +# s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'], +# 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', +# s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', +# s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', +# s+'gpu/MemoryAccessMatrixElements.h', s+'gpu/MemoryAccessMomenta.h', +# s+'gpu/MemoryAccessRandomNumbers.h', s+'gpu/MemoryAccessWeights.h', +# s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h', +# s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h', +# s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h', +# s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h', +# s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', +# s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', +# s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', +# s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', +# s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', +# s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', +# s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', +# s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', +# s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', +# s+'gpu/perf.py', s+'gpu/profile.sh', +# s+'CMake/SubProcesses/CMakeLists.txt', +# s+'gpu/cudacpp_rex_driver.mk', +# s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], +# 'test': [s+'gpu/cudacpp_test.mk']} + +# # from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') +# # from_template['SubProcesses'].append(s+'REX/REX.hpp') +# # from_template['SubProcesses'].append(s+'REX/teawREX.hpp') +# # from_template['SubProcesses'].append(s+'gpu/cudacpp_rex_driver.mk') + +# to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', +# 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', +# 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', +# 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', +# 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', +# 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', +# 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', +# 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', +# 'EventStatistics.h', 'CommonRandomNumbers.h', +# 'CrossSectionKernels.cc', 'CrossSectionKernels.h', +# 'MatrixElementKernels.cc', 'MatrixElementKernels.h', +# 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', +# 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', +# 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', +# 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', +# 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', +# 'MadgraphTest.h', 'runTest.cc', +# 'testmisc.cc', 'testxxx_cc_ref.txt', +# 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 +# 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 +# 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 +# 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 +# 'perf.py', 'profile.sh', +# 'rwgt_instance.h', 'REX.h', 'teawREX.h'] + # to_link_in_P.append('rwgt_instance.h') # to_link_in_P.append('REX.hpp') # to_link_in_P.append('teawREX.hpp') - template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_rex_runner.mk') + template_src_make = pjoin(PLUGINDIR, 'madgraph' ,'iolibs', 'template_files','gpu','cudacpp_src.mk') + template_tst_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_test.mk') + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_runner.mk') # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') @@ -465,10 +527,13 @@ def export_driver(self): replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) replace_dict['include_lines'] = '' replace_dict['run_set'] = '' + replace_dict['fbridge_vec'] = '' for name in self.rwgt_names: replace_dict['include_lines'] += '#include "%s/rwgt_runner.h"\n' % name - replace_dict['run_set'] += '%s::runner,' % name + replace_dict['run_set'] += '%s::getEventSet(),' % name + replace_dict['fbridge_vec'] += '%s::bridgeConstr(),' % name replace_dict['run_set'] = replace_dict['run_set'][:-1] + replace_dict['fbridge_vec'] = replace_dict['fbridge_vec'][:-1] template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) template = open(pjoin(template_path,'REX', 'rwgt_driver.inc'),'r').read() ff = open(pjoin(self.dir_path, 'SubProcesses', 'rwgt_driver.cc'),'w') From 77488e37e48a1a4fa2c97342b2f321439467bed7 Mon Sep 17 00:00:00 2001 From: Zenny Jovi Joestar Wettersten Date: Wed, 31 Jul 2024 11:54:29 +0200 Subject: [PATCH 18/76] added generic rwgt_runner header, modified runners and drivers to put all objects in driver's local memory, and modified REX and teawREX to account for these changes --- tools/REX/REX.cc | 3823 ++++++++++++++++++++++++++++++++- tools/REX/REX.h | 59 +- tools/REX/REX.hpp | 15 +- tools/REX/rwgt_driver.cc | 34 +- tools/REX/rwgt_instance.cc | 191 +- tools/REX/rwgt_instance.h | 40 +- tools/REX/rwgt_runner.cc | 156 +- tools/REX/rwgt_runner.h | 34 + tools/REX/rwgt_runner_copy.cc | 197 ++ tools/REX/teawREX.cc | 612 +++++- tools/REX/teawREX.h | 24 +- tools/REX/teawREX.hpp | 41 +- 12 files changed, 5031 insertions(+), 195 deletions(-) mode change 120000 => 100644 tools/REX/REX.cc create mode 100644 tools/REX/rwgt_runner.h create mode 100644 tools/REX/rwgt_runner_copy.cc mode change 120000 => 100644 tools/REX/teawREX.cc diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc deleted file mode 120000 index 65f267f382..0000000000 --- a/tools/REX/REX.cc +++ /dev/null @@ -1 +0,0 @@ -REX.hpp \ No newline at end of file diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc new file mode 100644 index 0000000000..b321052467 --- /dev/null +++ b/tools/REX/REX.cc @@ -0,0 +1,3822 @@ +/*** + * ______ _______ __ + * | ___ \ ___\ \ / / + * | |_/ / |__ \ V / + * | /| __| / \ + * | |\ \| |___/ /^\ \ + * \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _REX_CC_ +#define _REX_CC_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "REX.h" +#include + +// ZW: all fcns within the REX standard sit in the +// namespace REX +// Note that as a convention, std::string_view objects will be +// referred to as strings unless the difference is relevant +namespace REX +{ + +// using sortFcn = std::function>(std::vector)>; +// using statSort = std::function>(std::string_view, std::vector)>; + + // ZW: index sorting function, which returs vector + // of the indices of the original vector sorted + // by default in ascending order + // ie, for [5.0, 0.25, 2.0, 9.2] returns [1, 2, 0, 3] + template + std::shared_ptr> indSort(const std::vector &vector, std::function comp = std::less()) + { + auto sorted = std::make_shared>(vector.size()); + std::iota(sorted->begin(), sorted->end(), 0); + std::stable_sort(sorted->begin(), sorted->end(), [&](size_t i, size_t j) { return comp(vector[i], vector[j]); }); + return sorted; + } + + // ZW: wrapper for indSort for comparing string-type arguments representing integers + template + std::shared_ptr> stoiSort(const std::vector &vector) + { + std::function stoicomp = [](const T& i, const T& j) { + return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; + return indSort(vector, stoicomp); + } + template std::shared_ptr> stoiSort(const std::vector &vector); + + // ZW: wrapper for indSort for comparing string-type arguments representing doubles + template + std::shared_ptr> stodSort(const std::vector &vector) + { + std::function stodcomp = [](const T& i, const T& j) { return std::stod(std::string(i)) < std::stod(std::string(j)); }; + return indSort(vector, stodcomp); + } + + // ZW: templated fcn for finding the order of elements in a vector to_sort + // based on their order in a reference vector reference + // Elements not found in reference are represented by npos, + // including if to_sort is longer than reference + template + std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort) { + std::unordered_map> indexMap; + + // Populate indexMap with indices from vec1 + for (size_t i = 0; i < reference.size(); ++i) { + indexMap[reference[i]].push(i); + } + + auto order = std::make_shared>(std::vector(to_sort.size(), npos)); + //order->reserve(to_sort.size()); // Pre-allocate memory + size_t pos = 0; + for (const auto& elem : to_sort) { + auto it = indexMap.find(elem); + if (it != indexMap.end() && !it->second.empty()) { + order->at(pos) = (it->second.front()); + it->second.pop(); + } //else { + // Element in vec2 not found in vec1 + // order->at(pos) = npos; + //} + ++pos; + } + + return order; + } + template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + + // ZW: minimal fcn for counting the amount of times + // a given search term appears in a string + int nuStrCount( std::string_view searchString, std::string_view searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = searchString.find(searchTerm, pos)) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: fcn for finding the location of each + // entry of seachTerm in the given string textFile + // Pre-allocates vector memory using nuStrCount + std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ) + { + auto eachPos = std::make_shared>(); + eachPos->reserve( nuStrCount(textFile, searchTerm) ); + eachPos->push_back( textFile.find( searchTerm ) ); + size_t currPos = textFile.find( searchTerm, eachPos->at(0) + 1 ); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = textFile.find( searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element differentiated by linebreaks in the original string + // Removes sequential linebreaks, ie "\n\n\n" would + // only result in a single element separation + std::shared_ptr> nuLineSplitter( std::string_view currEvt ) + { + auto lineBreaks = nuFindEach( currEvt, "\n" ); + std::vector trueBreaks; + trueBreaks.reserve( lineBreaks->size() ); + for( size_t k = 0 ; k < lineBreaks->size() - 1 ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + trueBreaks.push_back( (*lineBreaks)[k] ); + } + auto splitLines = std::make_shared>(); + splitLines->reserve( trueBreaks.size() ); + size_t startPos = 0; + for( auto k : trueBreaks ) + { + splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); + startPos = k; + } + if( currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } + return splitLines; + } + + // ZW: fcn for finding each linebreak in a string, + // returning a vector of the positions of "\n" characters + // Ignores sequential linebreaks, ie would only return { } + // for the string "\n\n\n\n" + std::shared_ptr> lineFinder( std::string_view currEvt, size_t startPos = 0, size_t endPos = npos ) + { + auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); + auto truBreaks = std::make_shared>(); + truBreaks->reserve( lineBreaks->size() ); + for( size_t k = 0 ; k < lineBreaks->size() ; ++k ) + { + if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} + truBreaks->push_back( (*lineBreaks)[k] ); + } + return truBreaks; + } + + // ZW: fcn for splitting a string into a vector of strings, + // each element separated by blankspace (" ") in the original string + // Ignores sequential blankspaces, as well as linebreaks + // ie "hello \n\n\n world" would return {"hello", "world"} + // Does not ignore linebreaks that are not separated from words + // by anything other than blankspace, + // ie "hello \n\n\nworld \n\n" would return {"hello", "\n\nworld"} + std::shared_ptr> nuWordSplitter( std::string_view currEvt ) + { + std::vector noSpace; + size_t nuStart = currEvt.find_first_not_of( " " ); + size_t nuEnd = currEvt.find(" ", nuStart+1 ); + auto splitWords = std::make_shared>(); + splitWords->reserve(13); + while( nuStart != npos ) + { + std::string_view word = currEvt.substr( nuStart, nuEnd - nuStart ); + if( word == "" || word == "\n" || word == " " ){ + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + continue; } + splitWords->push_back( currEvt.substr( nuStart, nuEnd - nuStart ) ); + nuStart = currEvt.find_first_not_of(" ", nuEnd); + nuEnd = currEvt.find( " ", nuStart + 1); + } + return splitWords; + } + + // ZW: fcn for splitting a string into a vector of strings, + // elements separated by any form of blankspace in the original string + // Ignores sequential blankspaces of all forms + std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) + { + auto lines = nuLineSplitter( currEvt ); + auto splitString = std::make_shared>(); + splitString->reserve( lines->size() * lines->at(0).size() ); + for( auto line : *lines ) + { + auto words = nuWordSplitter(line); + for( auto word : *words ) + { + if( word == "" || word == "\n" || word == " " ){continue;} + splitString->push_back( word ); + } + } + return splitString; + } + + // ZW: templated fcn for comparing two + // string-like objects, ignoring cases + bool clStringComp( std::string_view org, std::string comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + bool clStringComp( std::string_view org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + bool clStringComp( std::string org, std::string_view comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + bool clStringComp( std::string org, std::string comp ){ + return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + } + // template + // bool clStringComp( const Str1& org, const Str2& comp ){ + // return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } + // template + // bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ + // return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), + // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); + // } + + // ZW: templated fcn for finding a caseless substring searchTerm in srcFile + // On failure to find searchTerm, returns REX::npos + template + size_t clStringFind( const Str1& srcFile, const Str2& searchTerm, size_t strtPt = 0 ){ + size_t strLen = searchTerm.size(); + if( srcFile.size() == 0 || srcFile.size() < strLen ){ return npos; } + for( size_t k = strtPt ; k < srcFile.size() - strLen; ++k ) + { + if( clStringComp( srcFile.substr(k, strLen), searchTerm ) ){ return k; } + } + return npos; + } + + // ZW: templated fcn for finding a caseless substring searchTerm of srcFile + // fulfilling a particular predicate cond( size_t, string ) + template + size_t clStringFindIf( const Str1& srcFile, const Str2& searchTerm, std::function& cond, size_t strtPt = 0 ) + { + auto currPt = clStringFind( srcFile, searchTerm, strtPt ); + bool condStat = cond( currPt, srcFile ); + while( !( condStat ) && currPt != npos) + { + currPt = clStringFind( srcFile, searchTerm, currPt + 1 ); + condStat = cond( currPt, srcFile ); + } + return currPt; + } + + // ZW: templated fcn for counting the number of occurances of + // caseless substring searchTerm in string-like object srcFile + template + int clStrCount( Str1 srcFile, Str2 searchTerm ) + { + int count = 0; + size_t pos = 0; + while((pos = clStringFind( srcFile, searchTerm, pos ) ) != npos ){ + ++count; + ++pos; + } + return count; + } + + // ZW: templated fcn for finding each instance of + // of substring searchTerm of string-like object srcFile + template + std::shared_ptr> clFindEach( Str1 srcFile, Str2 searchTerm ) + { + auto eachPos = std::make_shared>(); + auto nos = clStrCount(srcFile, searchTerm); + if( nos == 0 ){ return eachPos; } + eachPos->reserve( nos ); + eachPos->push_back( clStringFind( srcFile, searchTerm ) ); + size_t currPos = clStringFind( srcFile, searchTerm, eachPos->at(0) + 1); + while( currPos != npos ) + { + eachPos->push_back( currPos ); + currPos = clStringFind( srcFile, searchTerm, currPos + 1 ); + } + return eachPos; + } + + // ZW: fcn for finding left angle bracket + // indicating the start of a new node in an XML file + size_t nodeStartFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] == '!' || parseFile[retPtr +1] == '/' || parseFile[retPtr +1] == '?' ){ + retPtr = parseFile.find("<", retPtr +1); + } + return retPtr; + } + + size_t endNodeStartFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeStartFind( parseFile, strtPos )); + } + + std::pair startNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeStartFind( parseFile, strtPos ), endNodeStartFind( parseFile, strtPos ) }; + } + + // ZW: fcn for finding left angle bracket + // indicating an end of a node in an XML file + size_t nodeEndFind( std::string_view parseFile, size_t strtPos ) + { + auto retPtr = parseFile.find("<", strtPos); + while( parseFile[retPtr + 1] != '/' ){ + retPtr = parseFile.find("<", retPtr +1); + } + return retPtr; + } + + size_t endNodeEndFind( std::string_view parseFile, size_t strtPos ) + { + return parseFile.find(">", nodeEndFind( parseFile, strtPos )); + } + + std::pair endNodePts( std::string_view parseFile, size_t strtPos ) + { + return { nodeEndFind( parseFile, strtPos ), endNodeEndFind( parseFile, strtPos ) }; + } + + // ZW: struct for handling tags in XML node opening tags + void xmlTag::setVal( std::string_view valSet ){ modded = true; val = valSet; } + void xmlTag::setId( std::string_view idSet ){ modded = true; id = idSet; } + std::string_view xmlTag::getVal(){ return val; } + std::string_view xmlTag::getId(){ return id; } + bool xmlTag::isModded(){ return modded; } + xmlTag::xmlTag(){ modded = false; return; } + xmlTag::xmlTag( xmlTag& oldTag ){ + modded = false; val = oldTag.getVal(); id = oldTag.getId(); + } + xmlTag::xmlTag( std::string_view initId, std::string_view initVal){ + modded = false; val = initVal; id = initId; + } + + // ZW: function for parsing XML opening + // tags and returning the next header tag + std::shared_ptr xmlTagParser( std::string_view tagLine, size_t& equPt ) + { + auto tagBreaker = tagLine.find_first_not_of(" ", equPt+1); // ZW: need to determine what type of quotation marks are used + auto tagEnder = tagLine.find( tagLine[tagBreaker], tagBreaker+1); + auto attrEnd = tagLine.find_last_not_of(" ", equPt - 1) ; + auto attrStart = tagLine.find_last_of(" ", attrEnd) + 1; + auto tagPtr = std::make_shared(tagLine.substr(attrStart, attrEnd - attrStart + 1), tagLine.substr(tagBreaker + 1, tagEnder - tagBreaker - 1)); + equPt = tagLine.find("=", equPt + 1); // ZW: modifies input equPt to point to the next equality sign in tagLine + return tagPtr; + } + + // ZW: struct for handling the tree structure of XML files, + // essentially just giving the positions of the beginning and + // end of each node s.t. the proper node structures can accurately + // detail where children begin and end while allowing for personal + // content between child nodes + xmlTree::xmlTree(){ return; } + xmlTree::xmlTree( std::string_view file ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v"); + if( file.compare(start, 1, "<") != 0 ) { + faux = true; + contSt = start; + end = std::min( nodeStartFind(file, start), nodeEndFind(file, start) ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 || file.compare(start + 1, 1, "?") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + end = std::min( nodeStartFind(file, contEnd), nodeEndFind(file, contEnd) ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + initialised = true; + return; + } + contSt = stEnd + 1; + auto stPos = nodeStartFind(file, start + 1); + stEnd = nodeEndFind(file, start + 1); + contEnd = std::min(stPos, stEnd); + while( stPos < stEnd ) + { + children->push_back( std::make_shared( file, stPos, stEnd ) ); + } + stEnd = endNodeEndFind(file, stEnd); + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + initialised = true; + } + xmlTree::xmlTree( std::string_view file, size_t& strt, size_t& nd ){ + origin = file; + children = std::make_shared>>(); + start = file.find_first_not_of(" \n\r\f\t\v", strt); + if( file.compare(start, 1, "<") != 0) { + faux = true; + contSt = start; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + end = std::min( strt, nd ); + contEnd = end; + initialised = true; + return; + } + if( file.compare(start + 1, 1, "!") == 0 ) { + faux = true; + contSt = start; + contEnd = file.find(">", start + 1); + strt = nodeStartFind(file, contEnd); + nd = nodeEndFind(file, contEnd); + end = std::min( strt, nd ); + initialised = true; + return; + } + auto stEnd = file.find(">", start); + if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { + end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); + contSt = npos; + contEnd = npos; + strt = nodeStartFind(file, start); + nd = nodeEndFind(file, start); + initialised = true; + return; + } + contSt = stEnd + 1; + strt = nodeStartFind(file, start + 1); + nd = nodeEndFind(file, start + 1); + contEnd = std::min(strt, nd); + while( strt < nd ) + { + children->push_back( std::make_shared( file, strt, nd ) ); + } + end = file.find_first_not_of(" \n\r\f\t\v", endNodeEndFind(file, nd) + 1); + initialised = true; + strt = end; + nd = nodeEndFind(file, strt); + } + + // ZW: struct for handling nodes in generic XML files + xmlNode::xmlNode(){ modded = false; return; } + xmlNode::xmlNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ){ + modded = false; + xmlFile = originFile.substr( begin ); + structure = xmlTree( originFile ); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto child : childs ){ + children.push_back( child ); + } + } + xmlNode::xmlNode( xmlTree &tree ){ + modded = false; + structure = tree; + if( !structure.isInit() ){ return; } + xmlFile = structure.getOrigin(); + faux = structure.isFaux(); + start = structure.getStart(); + end = structure.getEnd(); + size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start); + name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto& child : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *child ) ); + } + } + std::vector> xmlNode::getChildren(){ return children; } + std::vector> xmlNode::getTags(){ return tags; } + std::string_view xmlNode::getFile(){ return xmlFile; } + std::string_view xmlNode::getName(){ return name; } + std::string_view xmlNode::getContent(){ return content; } + size_t xmlNode::getStart(){ return start; } + size_t xmlNode::getEnd(){ return end; } + xmlTree xmlNode::getTree(){ return structure; } + bool xmlNode::isModded(){ return modded; } + bool xmlNode::isModded( bool deep ){ + bool modStat = isModded(); + if( !deep ){ return modStat; } + for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } + return modStat; + } + bool xmlNode::isWritten(){ return written; } + bool xmlNode::isParsed(){ return parsed; } + bool xmlNode::isFaux(){ return faux; } + bool xmlNode::hasChildren(){ return children.size() > 0; } + void xmlNode::setModded( bool mod ){ modded = mod; } + bool xmlNode::deepModded(){ return deepMod; } + bool xmlNode::deepParse(){ return deepParsed; } + void xmlNode::parser( bool recursive ){ + parsed = parse( recursive ); + } + void xmlNode::addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } + void xmlNode::addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } + void xmlNode::setFile( std::string_view file ){ modded = true; xmlFile = file; } + void xmlNode::setName( std::string_view newName ){ modded = true; name = newName; } + void xmlNode::setCont( std::string_view cont ){ modded = true; content = cont; } + + bool xmlNode::parse(){ + auto topStat = parseTop(); + auto contStat = parseContent(); + return ( topStat && contStat ); + } + bool xmlNode::parse( bool recurs ) + { + bool parseSt = parse(); + if( !recurs ){ return parseSt; } + bool childSt = parseChildren( recurs ); + deepMod = true; + return (parseSt && childSt ); + } + bool xmlNode::parseTop(){ + if( xmlFile == "" ){ return false; } + if( isFaux() ){ return true; } + size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); + while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } + return true; + } + bool xmlNode::parseContent(){ + if( xmlFile == "" ){ return false; } + end = structure.getContEnd(); + for( auto branch : *(structure.getChildren()) ){ + children.push_back( std::make_shared( *branch ) ); + } + return true; + } + bool xmlNode::parseChildren( bool recursive ){ + bool status = true; + if( recursive ){ + for( auto child : children ) + { + status = (status && child->parse( true )); + deepParsed = true; + } + } else { + for( auto child : children ) + { + status = (status && child->parse()); + deepParsed = true; + } + } + return status; + } + void xmlNode::headWriter() { + if( isFaux() ){ return; } + nodeHeader = "<" + std::string(name) ; + for( auto tag : tags ){ + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void xmlNode::endWriter() { + if( isFaux() ){ return; } + auto endSt = xmlFile.find_last_of("<", end); + nodeEnd = xmlFile.substr( endSt, end - endSt ); + } + void xmlNode::contWriter() { + if( hasChildren() ){ + nodeContent = std::string(content.substr(0, children[0]->start - 1 )); + } else { + nodeContent = std::string(content); + } + } + void xmlNode::childWriter() { + for(auto child : children){ + nodeContent += (*child->nodeWriter()); + } + } + void xmlNode::endFinder(){ + auto headEnd = xmlFile.find(">", start); + auto slashPos = xmlFile.find("/", start); + if( headEnd > slashPos ){ end = headEnd; } + else{ end = xmlFile.find( ">", xmlFile.find( "( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + + void xmlNode::childCounter( int& noChilds ) + { + for( auto child : children ) + { + child->childCounter( noChilds ); + if( child->end == 0 || child->isFaux() ){ --noChilds; } + } + noChilds += children.size(); + } + int xmlNode::childCounter() { + int noChilds = 0; + childCounter( noChilds ); + return noChilds; + } + std::shared_ptr xmlNode::nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + + + // ZW: function for large scale parsing of XML files + // sequentially goes through the document and + // recursively calls itself while the next node + // beginning is closer than the next node ending + std::shared_ptr xmlPtrParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + initPos = nodeStartFind( parseFile, initPos + 1 ); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for handling rwgt parameter sets + // in the LHE header initrwgt node + int headWeight::headWeight::getId(){ return id; } + std::string_view headWeight::getTag(){ return idTag; } + bool headWeight::hasTag(){ return (idTag.size() > 0); } + headWeight::headWeight(){ name = "weight"; return; } + headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight::headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight::headWeight( xmlNode& node ) : xmlNode( node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( xmlNode* node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( std::shared_ptr node ) : xmlNode( *node ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( xmlTree& tree ) : xmlNode( tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( xmlTree* tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ + parser( false ); + name = "weight"; + for (auto tag : tags ){ + if( tag->getId() == "id" ){ + idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); + id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); + } + } + } + headWeight::headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin ) : xmlNode(){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + } + headWeight::headWeight( std::string_view paramSet, std::string& idText){ + name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; + } + void headWeight::setId( std::string identity ){ modded = true; idTag = identity; } + void headWeight::headWriter(){ + if( tags.size() == 0 ){ + if( idTag == "" ){ nodeHeader = ""; return; } + if( id == npos ){ nodeHeader = ""; return; } + nodeHeader = ""; + return; + } + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void headWeight::headWriter( bool incId ){ + if( !incId ){ headWriter(); return; } + if( idTag == "" ){ headWriter(); return; } + if( id == npos ){ nodeHeader = "getId() == "id" ){ continue; } + nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void headWeight::endWriter() { + nodeEnd = "\n"; + } + void headWeight::contWriter() { + nodeContent = std::string( content ); + } + void headWeight::childWriter() { + for( auto child : children){ + if( child->getName() == "weight" ){ continue; } + nodeContent += *(child->nodeWriter()); + } + } + void headWeight::childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + } + void headWeight::fullWriter(){ + if( isModded() || !isWritten() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } + } + void headWeight::fullWriter( bool incId, bool hasChildren ){ + if( isModded() || !isWritten() ){ + headWriter( incId ); + contWriter(); + childWriter( hasChildren ); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } + } + + // ZW: struct for handling rwgt groups + // in the LHE header initrwgt node + bool weightGroup::getIncId(){ return includeId; } + void weightGroup::setIncId( bool nuIncId ){ includeId = nuIncId; } + std::vector> weightGroup::getWgts(){ return paramSets; } + void weightGroup::addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } + void weightGroup::addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} + weightGroup::weightGroup() : xmlNode(){ name = "weightgroup"; return; } + weightGroup::weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } + weightGroup::weightGroup( std::vector nuWgts ) : xmlNode(){ + name = "weightgroup"; + for( auto wgt : nuWgts ){ + paramSets.push_back( std::make_shared( wgt ) ); + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + name = "weightgroup"; + paramSets.reserve( children.size() ); + for( auto child : children ){ + if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } + } + for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } + } + weightGroup::weightGroup( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) + : xmlNode( originFile, begin, childs ){ + name = "weightgroup"; + if( parseTop() ){ + int checker = 0; + for( auto tag : tags ){ + if( tag->getId() == "name" ){ ++checker; rwgtName = tag->getVal(); } + if( tag->getId() == "weight_name_strategy" ){ ++checker; wgtNamStrat = tag->getVal(); + if(wgtNamStrat == "includeIdInWeightName"){ includeId = true; } } + if( checker == 2 ){ break; } + } + } + } + void weightGroup::headWriter() { + nodeHeader = "nodeWriter()); + } + } + void weightGroup::childWriter() { + for(auto child : children){ + if( child->getName() == "weight" ){ continue; } + nodeContent += (*child->nodeWriter()); + } + } + void weightGroup::childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + void weightGroup::endWriter() { nodeEnd = "
\n"; } + + std::vector> initRwgt::getGroups(){ return groups; } + size_t initRwgt::noGrps(){ return groups.size(); } + void initRwgt::addGroup( weightGroup nuGroup ){ + modded = true; + auto nuGrpPtr = std::make_shared( nuGroup ); + if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } + } + void initRwgt::addGroup( std::shared_ptr nuGroup ){ + modded = true; + if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } + } + void initRwgt::addWgt( unsigned int index, std::shared_ptr nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + void initRwgt::addWgt( unsigned int index, headWeight nuWgt ){ + if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } + else throw std::range_error( "Appending weight to uninitialised weightgroup." ); + } + initRwgt::initRwgt() : xmlNode(){ name = "initrwgt"; return; } + initRwgt::initRwgt( std::vector> nuGroups ) : xmlNode(){ + name = "initrwgt"; + for( auto group : nuGroups ){ + groups.push_back( std::make_shared( *group ) ); + } + } + initRwgt::initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + initRwgt::initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + initRwgt::initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + initRwgt::initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + name = "initrwgt"; + groups.reserve( children.size() ); + for( auto child : children ){ + groups.push_back( std::make_shared( *child ) ); + } + } + bool initRwgt::grpInit( std::shared_ptr& wgt ){ + if( grpIsInit ){ return true; } + else{ + groups = std::vector>( 1, wgt ); + grpIsInit = true; + return false; + } + } + void initRwgt::contWriter(){ + nodeContent = "\n"; + for( auto group : groups ){ + nodeContent += (*group->nodeWriter()); + } + } + void initRwgt::childWriter(){ + for( auto child : children ){ + if( child->getName() == "weightgroup" ){ continue; } + nodeContent += (*child->nodeWriter()); + } + } + void initRwgt::childWriter( bool hasChildren ){ + if( hasChildren ){ childWriter(); } + return; + } + + // ZW: struct for handling weights + // in event blocks of LHE files + void bodyWgt::setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } + void bodyWgt::setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} + void bodyWgt::setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} + void bodyWgt::setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} + void bodyWgt::setId( std::string nuId ){ + modded = true; id = nuId; + for( auto tag : tags ){ + if( tag->getId() == "id" ){ tag->setVal( id ); return; } + } + addTag( std::make_shared( "id", id ) ); + } + void bodyWgt::setModded( bool nuModded ){ modded = nuModded; } + std::string_view bodyWgt::getComment(){ return comment; } + std::string_view bodyWgt::getValS(){ return valS; } + double bodyWgt::getValD(){ return valD; } + bodyWgt::bodyWgt() : xmlNode(){ return; } + bodyWgt::bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } + bodyWgt::bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } + bodyWgt::bodyWgt( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) + : xmlNode( originFile, begin, childs ){ + auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); + valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ + parser( true ); + valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + valD = std::stod( valS ); + } + bodyWgt::bodyWgt( double value, std::string& idTag ){ + setVal( value ); + id = idTag; + addTag( std::make_shared("id",id) ); + } + void bodyWgt::appendWgt( std::shared_ptr document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + void bodyWgt::appendWgt( std::string* document ){ + if( !isWritten() ){ fullWriter(); } + *document += *writtenSelf; + } + std::shared_ptr bodyWgt::appendWgt( std::string_view document ){ + if(!isWritten() ){ fullWriter(); } + auto retDoc = std::make_shared( document ); + *retDoc += *writtenSelf; + return retDoc; + } + void bodyWgt::fullWriter() { + writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + *writtenSelf += ">" + std::string(valS) + "\n"; + modded = false; + written = true; + } + + // ZW: fcn for finding the next block in SLHA format + // parameter cards + size_t blockFinder( std::string_view parseFile, size_t startPt = 0 ){ + if( parseFile.size() > 5 ){ if( clStringComp(parseFile.substr(0,5), std::string("block") )){ return size_t(0); } } + return clStringFind( parseFile, std::string("\nblock"), startPt ); + } + + // ZW: fcn for finding each decay line in SLHA format + // parameter card + std::vector decBlockStractor( std::string_view parseFile ){ + auto allDs = nuFindEach( parseFile, "\nd" ); + std::vector decLines; + decLines.reserve( allDs->size() ); + for( auto pos : *allDs ) + { + if( !(clStringComp(parseFile.substr( pos+1, 5 ), std::string("decay"))) ){ continue; } + decLines.push_back( parseFile.substr( pos + 1, parseFile.find( "\n", pos + 1 ) - pos - 1 ) ); + } + return decLines; + } + + // ZW: fcn for extracting the relevant lines of + // a block in SLHA format parameter card + // removes any comments between start of this block and next + // and also ignores lines with other information, + // eg DECAY lines + std::vector blockLineStractor( std::string_view parseFile, size_t startPt = 0){ + auto blockStrt = blockFinder( parseFile, startPt ); + auto newBlock = blockFinder( parseFile, blockStrt + 1 ); + std::vector paramLines; + paramLines.reserve( nuStrCount( parseFile, "\n" ) ); + std::shared_ptr> parLines; + if( newBlock == npos ){ parLines = nuLineSplitter( parseFile.substr( blockStrt ) ); } + else{ parLines = nuLineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } + for( auto line : *parLines ) + { + if( line.size() == 0 ){ continue; } + if( line[0] != ' ' ){ continue; } + paramLines.push_back( line ); + } + return paramLines; + } + + // ZW: struct for handling the first line of + // LHE format event block + std::string_view evHead::getComment(){ return comment; } + std::string_view evHead::getWeight(){ return weight; } + std::string_view evHead::getScale(){ return scale; } + std::string_view evHead::getAQED(){ return aqed; } + std::string_view evHead::getAQCD(){ return aqcd; } + std::string_view evHead::getNprt(){ return nprt; } + std::string_view evHead::getProcID(){ return procid; } + bool evHead::isModded(){ return modded; } + bool evHead::isWritten(){ return written; } + void evHead::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void evHead::setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } + void evHead::setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } + void evHead::setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } + void evHead::setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } + void evHead::setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } + void evHead::setNprt( int nuNprt ){ modded = true; nprtint = nuNprt; nprtstr = std::to_string(nuNprt); nprt = nprtstr;} + void evHead::setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } + std::shared_ptr evHead::getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + evHead::evHead(){ return; } + evHead::evHead( const std::string_view originFile, size_t beginLine, size_t endLine ) + { + if( originFile.size() == 0){ return; } + beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); + if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + nprt = evLine->at(0) ; + procid = evLine->at(1); + weight = evLine->at(2); + scale = evLine->at(3); + aqed = evLine->at(4); + aqcd = evLine->at(5); + } + void evHead::writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + auto retText = std::make_shared( " " ); + *content = " " + std::string( nprt ); + for( size_t k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } + *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + + // ZW: struct for handling particle lines + // in LHE format event block + std::string_view lhePrt::getLine(){ return sourceFile; } + std::string_view lhePrt::getComment(){ return comment; } + std::vector lhePrt::getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } + std::string_view lhePrt::getE(){ return energy; } + std::string_view lhePrt::getMass(){ return mass; } + std::string_view lhePrt::getVTim(){ return vtim; } + std::string_view lhePrt::getSpin(){ return spin; } + std::string_view lhePrt::getPDG(){ return pdg; } + std::string_view lhePrt::getStatus(){ return status; } + std::vector lhePrt::getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } + std::vector lhePrt::getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } + void lhePrt::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } + void lhePrt::setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } + void lhePrt::setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } + void lhePrt::setMass( std::string_view nuM ){ modded = true; mass = nuM; } + void lhePrt::setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } + void lhePrt::setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } + void lhePrt::setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } + void lhePrt::setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } + void lhePrt::setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } + void lhePrt::setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } + bool lhePrt::isModded(){ return modded; } + bool lhePrt::isWritten(){ return written; } + std::shared_ptr lhePrt::getContent(){ + if( !isWritten() || isModded() ){ writer(); } + return content; + } + lhePrt::lhePrt(){ return; } + lhePrt::lhePrt( std::pair& prtInfo ){ + status = std::to_string( prtInfo.first ); + pdg = std::to_string( prtInfo.second ); + } + lhePrt::lhePrt( std::pair& prtInfo ){ + status = std::string_view( prtInfo.first ); + pdg = std::string_view( prtInfo.second ); + } + lhePrt::lhePrt( const std::string_view originFile, const size_t& beginLine, const size_t& endLine ) + { + sourceFile = originFile.substr( beginLine, endLine - beginLine ); + auto evLine = nuWordSplitter( sourceFile ); + pdg = evLine->at(0); + status = evLine->at(1); + mothers[0] = evLine->at(2); mothers[1] = evLine->at(3); + icol[0] = evLine->at(4); icol[1] = evLine->at(5); + for( int k = 6 ; k < 9 ; ++k){ + mom[k-6] = evLine->at(k); + } + energy = evLine->at(9); + mass = evLine->at(10); + vtim = evLine->at(11); + spin = evLine->at(12); + if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } + } + void lhePrt::writer(){ + if( isWritten() && !isModded() ){ return; } + if( !isModded() ){ content = std::make_shared( sourceFile ); return; } + *content = ""; + for( size_t k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } + *content += std::string(pdg) + " " + std::string(status); + for( auto mum : mothers ){ *content += " " + std::string( mum ); } + for( auto col : icol ){ *content += " " + std::string( col ); } + for( auto pval : mom ){ *content += " " + std::string(pval); } + *content += " " + std::string( energy ) + " " + std::string( mass ) + " " + std::string( vtim ) + " " + std::string( spin ); + if( comment != "" ){ *content += " # " + std::string( comment ); } + *content += "\n"; + modded = false; + written = true; + } + + // ZW: struct for handling LHE format event block + evHead event::getHead(){ return header; } + std::vector> event::getPrts(){ return prts; } + std::vector> event::getWgts(){ return rwgt; } + void event::setHead( evHead head ){ modded = true; header = head; } + void event::addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } + void event::addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } + void event::setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } + void event::addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } + void event::addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } + void event::addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } + bool event::newWeight(){ return addedWgt; } + int event::getNprt(){ return prts.size(); } + bool event::isModded() { return modded; } + bool event::isModded( bool deep ) { + if( !deep ){ return modded; } + bool modStat = modded; + for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } + modStat = (modStat || header.isModded()); + for( auto prt : prts ){ if(modStat){ return modStat; }; modStat = (modStat || prt->isModded()); } + for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } + return modStat; + } + event::event(){ return; } + event::event( std::vector>& prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + for( auto& prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event::event( std::vector>& prtInfo ){ + header.setNprt( prtInfo.size() ); + for( auto& prt : prtInfo ){ + prts.push_back( std::make_shared( prt ) ); + } + } + event::event( std::vector> prtInfo ){ + header.setNprt( std::to_string( prtInfo.size() ) ); + prts = prtInfo; + } + event::event( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) + : xmlNode(originFile, begin, childs) { + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); + if( trueStart == npos ){ return; } + auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); + } + } + event::event( const xmlNode& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( const xmlNode* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( const std::shared_ptr& originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( xmlTree& originFile ) + : xmlNode( originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( xmlTree* originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( std::shared_ptr originFile ) + : xmlNode( *originFile ) { + size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); + auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); + header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); + prts.reserve(vals->size()); + for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) + { + prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); + } + } + event::event( const event& original ){ + this->rwgt = original.rwgt; + this->header = original.header; + this->prts = original.prts; + this->procMap = original.procMap; + this->procOrder = original.procOrder; + } + event::event( event* original ) : event(*original){}; + event::event( std::shared_ptr original) : event(*original){}; + bool event::prtsAreMod(){ + for( auto prt : prts ){ if( prt->isModded() ){ return true; } } + return false; + } + bool event::headIsMod(){ + return header.isModded(); + } + bool event::isSpecSort() const { return specSorted; } + sortFcn event::getSortFcn() const { return eventSort; } + statSort event::getStatSort() const { return specSort; } + bool event::hasRwgt(){ + if( rwgt.size() > 0 ){ return true; } + return false; + } + bool event::rwgtChild(){ + if( childRwgt != nullptr ){ return true; } + for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } + return false; + } + bool event::bothRwgt(){ return (hasRwgt() && rwgtChild() ); } + bool event::eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } + bool event::initProcMap(bool hard) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *stoiSort( stat->second ); + } + hasBeenProc = true; + return true; + } + bool event::initProcMap( sortFcn sorter, bool hard ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = false; + eventSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter( stat->second ); + } + hasBeenProc = true; + return true; + } + bool event::initProcMap( statSort sorter, bool hard ) + { + if(!hard){ if( procMap.size() > 0 ){ return true; } } + specSorted = true; + specSort = sorter; + for( auto prt : prts ){ + procMap.insert({prt->getStatus(), std::vector()}); + procOrder.insert({prt->getStatus(), std::vector()}); + } + for( auto prt : prts ){ + procMap[prt->getStatus()].push_back( prt->getPDG() ); + } + for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ + procOrder[stat->first] = *sorter(stat->first, stat->second ); + } + hasBeenProc = true; + return true; + } + bool event::inRwgtChild( std::string_view nameIn ){ + for( auto child : childRwgt->getChildren() ){ + for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), nameIn)){ return true; } } + } + return false; + } + bool event::checkRwgtOverlap(){ + for( auto wgt : rwgt ){ + for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } + } + return false; + } + void event::childRwgtWriter(){ + if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } + } + void event::vecRwgtWriter( bool midNode ){ + if( !midNode ){ nodeContent += "\n"; } + for( auto wgt : rwgt ){ + nodeContent += *wgt->nodeWriter(); + } + nodeContent += "\n"; + } + void event::rwgtWriter(){ + if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } + childRwgtWriter(); + nodeContent.erase( nodeContent.size() - 8, 8 ); + vecRwgtWriter(); + return; + } else { + if( hasRwgt() ){ vecRwgtWriter(); return; } + if( rwgtChild() ){ childRwgtWriter(); return; } + } + } + void event::contWriter() { + nodeContent = "\n" + *header.getContent(); + for( auto prt : prts ){ + nodeContent += *prt->getContent(); + } + } + void event::childWriter() { + for( auto child : children ){ + if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } + nodeContent += *child->nodeWriter(); + } + } + void event::fullWriter() { + if( isModded( false ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void event::fullWriter( bool deep ){ + if( !deep ){ fullWriter(); return; } + if( isModded( true ) ){ + headWriter(); + contWriter(); + childWriter(); + rwgtWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + modded = false; + written = true; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); + written = true; + } + } + void event::appendWgts(){ + if( !addedWgt ){ return; } + writtenSelf->erase( writtenSelf->size() - 17, 17 ); + for( auto wgt : rwgt ){ + if( !wgt->isWritten() ){ wgt->appendWgt( writtenSelf ); } + } + *writtenSelf += "\n\n"; + } + std::shared_ptr event::nodeWriter() { + if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + std::shared_ptr event::nodeWriter( bool recursive ){ + if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } + if( addedWgt ){ appendWgts(); } + return writtenSelf; + } + std::map> &event::getProc(){ + if( initProcMap() ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + std::map> &event::getProcOrder(){ + if( initProcMap() ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + std::map> event::getProc() const { + if ( hasBeenProc ){ return procMap; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + std::map> event::getProcOrder() const { + if ( hasBeenProc ){ return procOrder; } + else throw std::runtime_error("Const declaration of event node before it has been procesed."); + } + std::map> &event::getProc(sortFcn sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + std::map> &event::getProcOrder(sortFcn sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + std::map> &event::getProc(statSort sorter){ + if( initProcMap(sorter) ){ return procMap; } + else throw std::runtime_error("Error while parsing event node."); + } + std::map> &event::getProcOrder(statSort sorter){ + if( initProcMap(sorter) ){ return procOrder; } + else throw std::runtime_error("Error while parsing event node."); + } + + eventSet::eventSet(){ + events = std::vector(); + } + eventSet::eventSet( const eventSet& nuEvents ){ + this->events = nuEvents.events; + this->relStats = nuEvents.relStats; + this->comp = nuEvents.comp; + } + eventSet::eventSet( std::vector& nuEvents ){ + events = std::vector(); + for( auto ev : nuEvents ){ + events.push_back( ev ); + } + } + eventSet::eventSet( std::vector>& nuEvents ){ + events = std::vector(); + for( auto ev : nuEvents ){ + events.push_back( *ev ); + } + } + void eventSet::setRelStats( std::vector& nuStats ){ + relStats = nuStats; + } + void eventSet::addEvent( event& nuEvent ){ + events.push_back( nuEvent ); + } + void eventSet::addEvent( std::shared_ptr nuEvent ){ + events.push_back( *nuEvent ); + } + void eventSet::addEvent( std::vector& nuEvents ){ + for( auto ev : nuEvents ){ + events.push_back( ev ); + } + } + void eventSet::addEvent( std::vector> nuEvents ){ + for( auto ev : nuEvents ){ + events.push_back( *ev ); + } + } + void eventSet::setComp( eventSetComp nuComp ){ + this->comp = nuComp; + } + bool eventSet::belongs( event& ev ){ + if( this->comp == nullptr ) throw std::runtime_error("No comparison function set for eventSet."); + return this->comp(ev, relStats); + } + bool eventSet::belongs( std::shared_ptr ev ){ + if( this->comp == nullptr ) throw std::runtime_error("No comparison function set for eventSet."); + return this->comp(*ev, relStats); + } + + event& makeEv( std::vector>& particles ){ + static auto returnEvent = event( particles ); + return returnEvent; + } + + std::vector> getParticles( event& ev ){ + return ev.getPrts(); + } + + // ZW: struct for handling the first line of + // LHE format init tag + bool lheInitHead::isWritten(){ return written; } + bool lheInitHead::isModded(){ return modded; } + std::shared_ptr lheInitHead::getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitHead::lheInitHead( std::string_view initHead ){ + auto vals = *nuBlankSplitter( initHead ); + if( vals.size() < 10 ){ return; } + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + lheInitHead::lheInitHead( xmlNode& initNode ) + { + if( initNode.getName() != "init" ){ return; } + auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; + auto endPos = initNode.getFile().find( "\n", startPos ); + auto vals = *nuBlankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); + idbmup[0] = vals[0]; idbmup[1] = vals[1]; + ebmup[0] = vals[2]; ebmup[1] = vals[3]; + pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; + pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; + idwtup = vals[8]; nprup = vals[9]; + } + void lheInitHead::writer(){ + *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) + + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; + written = true; + modded = false; + } + + // ZW: struct for handling process lines + // in LHE format init tag + bool lheInitLine::isWritten(){ return written; } + bool lheInitLine::isModded(){ return modded; } + std::shared_ptr lheInitLine::getContent(){ + if( isModded() || !isWritten() ){ writer(); } + return content; } + lheInitLine::lheInitLine(){} + lheInitLine::lheInitLine( std::string_view procLine ) + { + auto vals = *nuBlankSplitter( procLine ); + if( vals.size() < 4 ){ return; } + xsecup = vals[0]; + xerrup = vals[1]; + xmaxup = vals[2]; + lprup = vals[3]; + } + void lheInitLine::writer(){ + *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; + written = true; + modded = false; + } + + // ZW: struct for handling single parameter line in + // SLHA format parameter card + void paramVal::parse(){ + id = std::stoi( std::string(idStr) ); + value = std::stod( std::string(valStr) ); + } + paramVal::paramVal(){ realLine = ""; idStr = ""; valStr = ""; } + paramVal::paramVal( std::string_view paramLine, bool parseOnline ) + { + if( paramLine.find("\n") != npos ){ + auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); + if( startPos!= npos ){ + auto endPos = paramLine.find("\n", startPos); + realLine = paramLine.substr(startPos, endPos - startPos - 1); + } else{ + realLine = paramLine.substr( 0, paramLine.find("\n") - 1 ); + } + } + realLine = paramLine; + auto vals = *nuBlankSplitter( realLine ); + idStr = vals[0]; + valStr = vals[1]; + if( parseOnline ){ + if( vals.size() > 2 ) + { + auto comStart = realLine.find("#"); + comStart = realLine.find_first_not_of( " #", comStart ); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + parse(); } + } + bool paramVal::isMod(){ return modded; } + std::shared_ptr paramVal::selfWrite(){ + auto writeVal = std::make_shared(""); + if( isMod() ) + { + for( int k = idStr.size() ; k < 5 ; ++k ){ *writeVal += " "; } + *writeVal += std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + + // ZW: struct for handling single DECAY line + // in SLHA format parameter card + void decVal::parse() { + auto vals = *nuBlankSplitter( realLine ); + id = std::stoi( std::string(vals[1]) ); + value = std::stod( std::string(vals[2]) ); + if( vals.size() > 3 ) + { + auto comStart = realLine.find("#"); + comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); + } + } + decVal::decVal( std::string_view paramLine, bool parseOnline ) : paramVal( paramLine, false ) + { + if( parseOnline ){ parse(); } + } + std::shared_ptr decVal::selfWrite() { + auto writeVal = std::make_shared(""); + if( isMod() ) + { + *writeVal += "DECAY " + std::string( idStr ) + " " + std::string( valStr ); + if( comment.size() != 0 ){ + *writeVal += " # " + std::string( comment ); + } + *writeVal += "\n"; + } + else{ *writeVal = std::string( realLine ) + "\n"; } + return writeVal; + } + + // ZW: struct for handling parameter block + // in SLHA format parameter card + void paramBlock::parse( bool parseOnline ){ + if( realBlock.size() == 0 ){ return; } + if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } + auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); + name = realBlock.substr( namePt, realBlock.find_first_of( " \n", namePt ) - namePt ); + if( realBlock.find( " ", namePt ) < realBlock.find( "\n", namePt ) ) + {comment = realBlock.substr( namePt + name.size(), realBlock.find( "\n", namePt ) - namePt - name.size() ); } + auto paramLines = blockLineStractor( realBlock.substr( startPt ) ); + params.reserve( paramLines.size() ); + for( auto line : paramLines ) + { + params.push_back( paramVal( line, parseOnline ) ); + } + } + paramBlock::paramBlock(){ return; } + paramBlock::paramBlock( std::string_view paramSet, bool parseOnline ) + { + realBlock = paramSet; + startPt = clStringFind( realBlock, std::string("\nB") ); + if( parseOnline ){ parse(parseOnline); } + } + bool paramBlock::isMod(){ return modded; } + std::shared_ptr paramBlock::selfWrite(){ + auto writeBlock = std::make_shared(""); + if( isMod() ) + { + *writeBlock += "\nBLOCK " + std::string(name); + if( comment.size() > 0 ){ + *writeBlock += " # " + std::string( comment ); + } + *writeBlock += "\n"; + for ( auto val : params ) + { + *writeBlock += *val.selfWrite(); + } + } + else{ if( startPt == npos ){ + *writeBlock += realBlock; + } else { + *writeBlock = realBlock.substr( startPt ); + } } + return writeBlock; + } + + // ZW: struct for handling DECAY lines + // in SLHA format parameter card + void decBlock::parse( bool parseOnline ){ + if( realBlock.size() == 0 ){ return; } + auto decLines = clFindEach( realBlock, std::string("\ndecay") ); + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + void decBlock::parse( std::shared_ptr> decLines, bool parseOnline ) { + decays.reserve(decLines->size()); + if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) + { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } + for( auto pts : *decLines ) + { + auto lineBr = realBlock.find( "\n", pts + 1 ); + if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } + decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); + } + } + decBlock::decBlock( std::string_view paramSet, bool parseOnline ) : paramBlock( paramSet, parseOnline ) + { + realBlock = paramSet; + if( parseOnline ){ parse(parseOnline); } + } + std::shared_ptr decBlock::selfWrite() { + auto writeBlock = std::make_shared(""); + *writeBlock += "\n"; + for ( auto val : decays ) + { + *writeBlock += *val.selfWrite(); + } + return writeBlock; + } + + // ZW: struct for handling SLHA parameter cards + void lesHouchesCard::parse( bool parseOnline ) + { + if( parsed ){ return; } + if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } + auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); + auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); + header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); + for( size_t k = 0 ; k < blockPts->size() - 1 ; ++k ) + { + blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); + } + blocks.push_back(paramBlock(xmlFile.substr(blockPts->at(blockPts->size()-1), clStringFindIf( xmlFile, std::string("\n"), + lambda, blockPts->at(blockPts->size()-1) + 1) - blockPts->at(blockPts->size()-1)), parseOnline)); + decays = decBlock( xmlFile ); + decays.parse( decLines, parseOnline ); + parsed = true; + } + lesHouchesCard::lesHouchesCard( const std::string_view originFile, const size_t& begin, bool parseOnline ){ + xmlFile = originFile; start = begin; + modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" lesHouchesCard::selfWrite(){ + auto writeCard = std::make_shared(header); + if( isMod() ) + { for( auto block : blocks ) + { *writeCard += *block.selfWrite(); } + *writeCard += *decays.selfWrite(); } + else{ + if( end != npos ){ *writeCard += std::string( xmlFile.substr( blockStart, end - blockStart ) ); + } else{ *writeCard += std::string( xmlFile.substr( blockStart ) ); } + } + return writeCard; + } + + std::shared_ptr slhaNode::getParameters(){ + modded = true; + return parameterCard; + } + slhaNode::slhaNode() : xmlNode(){} + slhaNode::slhaNode( lesHouchesCard parameters ) : xmlNode(){ + parameterCard = std::make_shared( parameters ); + pCardInit = true; + } + slhaNode::slhaNode( std::shared_ptr parameters ) : xmlNode(){ + parameterCard = parameters; + pCardInit = true; + } + slhaNode::slhaNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ + parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); + } + slhaNode::slhaNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode::slhaNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ + parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); + } + slhaNode::slhaNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ + parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); + } + slhaNode::slhaNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } + slhaNode::slhaNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ + parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); + } + slhaNode::slhaNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) + : xmlNode( originFile, begin ){ + if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } + } + void slhaNode::headWriter(){ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">"; + } + void slhaNode::endWriter(){ nodeEnd += "\n"; } + void slhaNode::contWriter(){ + if( pCardInit ){ + nodeContent = *parameterCard->selfWrite(); + } else { + nodeContent = content; + } + } + + // ZW: struct for handling LHE init nodes + std::shared_ptr initNode::getHead(){ return initHead; } + std::vector> initNode::getLines(){ return initLines; } + void initNode::setHead( std::shared_ptr head ){ modded = true; initHead = head; } + void initNode::setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } + void initNode::addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } + initNode::initNode() : xmlNode(){ name = "init"; } + initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) + : xmlNode( originFile, begin ){ + content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + initNode::initNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ + content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } + } + bool initNode::parseContent(){ + if( content.size() == 0 ){ return false; } + auto linebreaks = lineFinder( content ); + if( linebreaks->size() == 0 ){ return false; } + initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); + for( size_t k = 0 ; k < linebreaks->size() - 1 ; ++k ){ + initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); + } + return true; + } + void initNode::contWriter(){ + if( isModded() ){nodeContent = std::string( content ); return; } + nodeContent = *initHead->getContent(); + for( auto line : initLines ){ + nodeContent += *line->getContent(); + } + } + + // ZW: struct for explicitly handling LHE header nodes + size_t lheHead::addWgtGroup( std::shared_ptr& wgtGroup ){ + hasRwgt = true; + modded = true; + if( wgtGrpInit( wgtGroup ) ){ + rwgtNodes->addGroup( wgtGroup ); + } + return (rwgtNodes->noGrps() - 1); + } + size_t lheHead::addWgtGroup( weightGroup wgtGroup ){ + hasRwgt = true; + modded = true; + auto wgtGrpPtr = std::make_shared( wgtGroup ); + if( wgtGrpInit( wgtGrpPtr ) ){ + rwgtNodes->addGroup( std::make_shared( wgtGroup ) ); + } + return (rwgtNodes->noGrps() - 1); + } + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void lheHead::addWgt( size_t index, headWeight nuWgt ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + rwgtNodes->addWgt( index, nuWgt ); + } + void lheHead::addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt->setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void lheHead::addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ + if( index >= (size_t)rwgtNodes->getGroups().size() ) + throw std::range_error( "Appending weight to uninitialised weightgroup." ); + hasRwgt = true; + modded = true; + nuWgt.setId( idTagg ); + rwgtNodes->addWgt( index, nuWgt ); + } + void lheHead::setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } + void lheHead::setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } + std::vector> lheHead::getWgtGroups(){ return rwgtNodes->getGroups(); } + std::shared_ptr lheHead::getInitRwgt(){ return rwgtNodes; } + std::shared_ptr lheHead::getParameters(){ return parameters; } + void lheHead::setParameters( std::shared_ptr params ){ parameters = params; } + bool lheHead::rwgtInc(){ return hasRwgt; } + lheHead::lheHead(){ return; } + lheHead::lheHead( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) + : xmlNode(originFile, begin, childs){ + xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if (child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( xmlNode& node ) : xmlNode(node){ + for( auto child : node.getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( xmlNode* node ) : xmlNode(*node){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( std::shared_ptr node ) : xmlNode( *node ){ + for( auto child : node->getChildren() ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( xmlTree tree ) : xmlNode( tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + lheHead::lheHead( xmlTree* tree ) : xmlNode( *tree ){ + for( auto child : children ){ + if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } + if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } + } + } + bool lheHead::wgtGrpInit( std::shared_ptr& wgtGrp ){ + if( wgtGrpIsInit ){ return true; } + if( rwgtNodes == nullptr ){ + rwgtNodes = std::make_shared(); + wgtGrpIsInit = true; + rwgtNodes->addGroup( wgtGrp ); + return false; + } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); + } + void lheHead::setRelChild(){ + if( relChildSet ){ return; } + relChild.reserve( children.size() ); + for( size_t k = 0 ; k < children.size() ; ++k ){ + auto child = &children[k]; + if( (*child)->getName() == "slha" ){ continue; } + if( (*child)->getName() == "initrwgt" ){ continue; } + relChild.push_back( k ); + } + relChildSet = true; + } + bool lheHead::parseChildren( bool recursive ){ + bool status = true; + for( auto child : children ){ + if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } + child->parser( recursive ); + status = (status && child->isParsed() ); + deepParsed = true; + } + return status; + } + void lheHead::headWriter(){ + nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; + } + nodeHeader += ">\n"; + } + void lheHead::childWriter(){ + setRelChild(); + for( auto relKid : relChild ){ + nodeContent += *(children[relKid]->nodeWriter()); + } + if( parameters != nullptr ){ nodeContent += *parameters->nodeWriter(); } + if( hasRwgt ){ + nodeContent += *rwgtNodes->nodeWriter(); + } + } + void lheHead::fullWriter(){ + if( isModded() ){ + headWriter(); + contWriter(); + childWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + } + } + + // ZW: struct for keeping track of appended weights in LHE node, + // since weight information is stored both in the header + // and in the individual events + newWgt::newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ + headWgt = heaWgt; bodyWgts = bodWgts; + } + newWgt::newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ + headWgt = heaWgt; + bodyWgts = std::vector>(wgts->size()); + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + newWgt::newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag ){ + headWgt = std::make_shared(parameters, idTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } + newWgt::newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag ){ + std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); + headWgt = std::make_shared(parameters, newTag); + bodyWgts = std::vector>(wgts->size()); + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), newTag); + } + } + newWgt::newWgt( std::string& parameters ){ + headWgt = std::make_shared(parameters); + } + newWgt::newWgt( std::string& parameters, std::string& idTag ){ + headWgt = std::make_shared(parameters, idTag); + } + std::shared_ptr newWgt::getHeadWgt(){ return headWgt; } + std::vector> newWgt::getBodyWgts(){ return bodyWgts; } + void newWgt::addBdyWgts( std::shared_ptr> wgts ){ + auto idTag = std::string(headWgt->getTag()); + if( idTag != "" ){ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i), idTag); + } + } else{ + for( size_t i = 0 ; i < wgts->size() ; ++i ){ + bodyWgts[i] = std::make_shared(wgts->at(i)); + } + } + } + + // ZW: general struct for handling LHE files explicitly + lheNode::lheNode() : xmlNode(){} + lheNode::lheNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) + : xmlNode(originFile, begin, childs){ + //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); + //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} + for( auto child : children ){ + if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } + if( child->getName() == "init" ){ init = std::make_shared( *child ); continue; } + if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } + } + } + std::shared_ptr lheNode::getHeader(){ return header; } + std::shared_ptr lheNode::getInit(){ return init; } + std::vector> lheNode::getEvents(){ return events; } + bool lheNode::isModded(){ return modded; } + bool lheNode::isModded( bool deep ){ + if( !deep ){ return isModded(); } + bool modStat = isModded(); + for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } + for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } + return modStat; + } + void lheNode::setInit( std::shared_ptr initNod ){ init = initNod; } + void lheNode::setHeader( std::shared_ptr headNod ){ header = headNod; } + void lheNode::addWgt( size_t index, newWgt& addedWgt ){ + header->addWgt( index, addedWgt.getHeadWgt() ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + void lheNode::addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ + header->addWgt( index, addedWgt.getHeadWgt(), idTag ); + auto wgtsVec = addedWgt.getBodyWgts(); + for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ + events[k]->addWgt( wgtsVec[k] ); + } + } + void lheNode::setRelStats( std::vector& particles ){ + relStat = particles; + } + std::vector& lheNode::getRelStats(){ + return relStat; + } + void lheNode::setSameSort( sortFcn& sortF ){ + particleSort = sortF; + } + sortFcn& lheNode::getSameSort(){ + return particleSort; + } + void lheNode::setStatSort( statSort& statS ){ + statParticleSort = statS; + } + statSort& lheNode::getStatSort(){ + return statParticleSort; + } + void lheNode::headerWriter(){ + nodeContent += "\n" + *header->nodeWriter(); + } + void lheNode::initWriter(){ + nodeContent += *init->nodeWriter(); + } + void lheNode::eventWriter(){ + for( auto event : events ){ + nodeContent += *event->nodeWriter(); + } + } + void lheNode::contWriter(){ + nodeContent = ""; + headerWriter(); + initWriter(); + eventWriter(); + } + void lheNode::fullWriter(){ + if( isModded( true ) ){ + headWriter(); + contWriter(); + endWriter(); + writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); + written = true; + modded = false; + } else if( !isWritten() ){ + writtenSelf = std::make_shared( xmlFile.substr(start, end - start ) ); + written = true; + } + } + std::shared_ptr lheNode::nodeWriter() { + if( isModded( true ) || !isWritten() ){ fullWriter(); } + return writtenSelf; + } + + // ZW: function for extracting event information from + // LHE files + std::vector>> valExtraction( lheNode& lheFile ) + { + bool getGs = true; + auto momVec = std::make_shared>(); + auto wgtVec = std::make_shared>(); + auto gVec = std::make_shared>(); + auto events = lheFile.getEvents(); + momVec->reserve( events.size() * 4 * std::stoi(std::string(events[0]->getHead().getNprt())) ); + wgtVec->reserve( events.size() ); + gVec->reserve( events.size() ); + if( getGs ){ + for( auto event : events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + } + } else{ + for( auto event : events ) + { + wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); + gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); + for( auto prt : event->getPrts() ) + { + momVec->push_back(std::stod(std::string(prt->getE()))); + for( int p = 0 ; p < 3 ; ++p ) + { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } + } + + } } + return {momVec, gVec, wgtVec}; + } + + // ZW: fcn for parsing an LHE format event block + // and return a REX format event object + std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + size_t equalSign = parseFile.find_first_of("=>", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format header + // and return a REX format lheHead object + std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ + currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); + } + if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ + currNode->setInitRwgt( std::make_shared( currNode->getChildren()[ currNode->getChildren().size() - 1 ] ) ); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: fcn for parsing an LHE format file + // and return a REX format LHE node object + std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) + { + auto currNode = std::make_shared(parseFile, initPos); + initPos = nodeStartFind( parseFile, initPos + 1 ); + while( initPos < endPos ) + { + if( parseFile.substr( initPos, 6 ) == "getEvents().push_back( evPtrParsor( parseFile, initPos, endPos ) ); + continue; + } else if( parseFile.substr( initPos, 7 ) == "setHeader(lheHeadParser( parseFile, initPos, endPos )); + continue; + } else if( parseFile.substr( initPos, 5 ) == "setInit( std::make_shared( parseFile, initPos ) ); + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, nodeEndFind( parseFile, endPos + 1 ) + 1); + continue; + } else { + currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); + } + } + size_t equalSign = parseFile.find("=", initPos); + size_t nodeInitEnd = parseFile.find(">", initPos); + while( equalSign < nodeInitEnd ){ + currNode->addTag( xmlTagParser(parseFile, equalSign) ); + } + initPos = nodeStartFind( parseFile, endPos ); + endPos = nodeEndFind( parseFile, endPos + 1 ); + return currNode; + } + + // ZW: struct for treating individual HEP + // processes, formatted based on PDG codes + // and the LHE particle status standard + struct lheProc { + public: + std::vector minusOne; + std::vector plusOne; + std::vector minusTwo; + std::vector plusTwo; + std::vector plusThree; + std::vector minusNine; + std::vector orderMOne; + std::vector orderOne; + std::vector orderMTwo; + std::vector orderTwo; + std::vector orderThree; + std::vector orderNine; + std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; + std::map> orderVecs{{"-1", orderMOne}, {"1", orderOne}, {"-2", orderMTwo}, {"2", orderTwo}, {"3", orderThree}, {"9",orderNine}}; + lheProc( event& eventNode ) + { + for( auto prt : eventNode.getPrts() ) + { + valVecs[prt->getStatus()].push_back(prt->getPDG()); + } + for( auto valVec = valVecs.begin() ; valVec!= valVecs.end() ; ++valVec ){ + if( valVec->second.size() == 0 ){ continue; } + orderVecs[valVec->first] = *stoiSort( valVec->second ); + } + } + std::shared_ptr writer(){ + auto written = std::make_shared(); + for( auto inits : valVecs["-1"] ){ + written->append(inits); + written->append(" "); + } + if( valVecs["2"].size() > 0 ){ + written->append("> "); + for( auto inits : valVecs["2"] ){ + written->append(inits); + written->append(" "); + } + } + written->append("> "); + for( auto inits : valVecs["1"] ){ + written->append(inits); + written->append(" "); + } + return written; + } + }; + + // ZW: fcn for uploading text files to the program + std::shared_ptr filePuller( const std::string& fileLoc ) + { + std::ifstream fileLoad( fileLoc ); + std::stringstream buffer; + buffer << fileLoad.rdbuf(); + auto fileContent = std::make_shared(buffer.str()); + //std::transform( fileContent->begin(), fileContent->end(), fileContent->begin(), ::tolower ); + buffer.str(std::string()); + fileLoad.close(); + return fileContent; + } + + // ZW: fcn for saving std::string to disk + bool filePusher( std::string fileLoc, std::string fileCont ) + { + std::ofstream fileWrite( fileLoc ); + if(!fileWrite){return false;} + fileWrite << fileCont; + fileWrite.close(); + return true; + } + + // ZW: fcn for extracting the full + // process information from an LHE event + std::shared_ptr>> pdgXtract( event& currEv ) + { + auto currProc = std::make_shared>>(); + auto &useProc = *currProc; + for( auto prt : currEv.getPrts() ) + { + useProc[ prt->getStatus() ].push_back(prt->getPDG()); + } + return currProc; + } + + template + bool chaoticVecComp( const std::vector& vec1, const std::vector order1, const std::vector& vec2, const std::vector order2 ) + { + if( vec1.size()!= vec2.size() ){ return false; } + for( size_t i = 0; i < vec1.size(); i++ ){ + if( vec1[order1[i]]!= vec2[order2[i]] ){ return false; } + } + return true; + } + + // ZW: fcn for comparing two processes in the + // format output by pdgXtract + bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& statVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : statVec ) + { + if( firstVec[code] != secVec[code] ){ return false; } + } + return true; + } + + bool sameProcString( std::map>& firstVec, std::map>& firstOrder, + std::map>& secVec, std::map>& secondOrder, + std::vector& statVec ) + { + if( firstVec.size() != secVec.size() ){return false;} + for(auto code : statVec ) + { + if( !chaoticVecComp(firstVec[code], firstOrder[code], secVec[code], secondOrder[code]) ){ return false; } + } + return true; + } + + // ZW: fcn for processes in the lheProc struct format + bool procComp( lheProc& firstProc, lheProc& secProc, std::vector statVec ) + { + for( auto stat : statVec ) + { + if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } + if( !chaoticVecComp( firstProc.valVecs[stat], firstProc.orderVecs[stat], secProc.valVecs[stat], secProc.orderVecs[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc()[stat].size()!= secEv.getProc()[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc()[stat], firstEv.getProcOrder()[stat], + secEv.getProc()[stat], secEv.getProcOrder()[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( event& firstEv, event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], + secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec = {"-1", "1"} ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + sortFcn sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, + statSort sorter ) + { + for( auto stat : statVec ) + { + if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } + if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), + secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } + } + return true; + } + + bool eventComp::operator()( event& firstEv, event& secEv){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool eventComp::operator()( const event& firstEv, const event& secEv) const { + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} + } + bool eventComp::operator()(event& firstEv, event& secEv, std::vector statVec){ + if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} + else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} + } + + // ZW: fcn for checking whether a list of pdgXtract format + // processes sourceProcList contains a given process newProc + bool procVecContains( std::vector>>>& sourceProcList, + std::map>& newProc, const std::vector& statVec ) + {\ + for( auto proc : sourceProcList ) + { + if( sameProcString( *proc, newProc, statVec ) ){ return true; } + } + return false; + } + + // ZW: fcn for checking whether a vector of lheProc structs + // procList contains a given lheProc nuProc + bool procListComp( const std::vector>& procList, lheProc& nuProc, std::vector statVec ) + { + if( procList.size() != 0 ){ + for(auto proc : procList ) + { + if( procComp( *proc, nuProc, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + sortFcn sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } + } + } + return false; + } + + bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, + statSort sorter ) + { + if( procList.size()!= 0 ){ + for( auto ev : procList ) + { + if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } + } + } + return false; + } + + // ZW: fcn for extracting the different processes + // in a given REX format LHE file in the pdgXtract format + std::vector>>> procExtractor( lheNode& lheFile ) + { + std::vector>>> procList; + const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + for( auto event : lheFile.getEvents() ) + { + auto currProc = pdgXtract( *event ); + if( procVecContains( procList, *currProc, statVec ) ){ continue; } + procList.push_back(currProc); + } + return procList; + } + + // ZW: fcn for extracting the different processes + // in a given REX format LHE file in the lheProc format + std::vector> processPull( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto event : lheFile.getEvents() ) + { + auto currProc = std::make_shared( *event ); + if( procListComp( procsList, *currProc, statVec ) ){ continue; } + procsList.push_back( currProc ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, std::vector statVec = { "-1", "1" } ) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setSameSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + std::vector> evProcessPull( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" }) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector> procsList{}; + lheFile.setStatSort(sorter); + for( auto currEv : lheFile.getEvents() ) + { + if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } + procsList.push_back( currEv ); + } + return procsList; + } + + // ZW: fcn for keeping track of subprocess ordering + // in LHE file + size_t procPos( const std::vector>& evtSet, lheProc& currProc, + std::vector& statVec ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + for( auto stat : statVec ) + { + if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } + } + return k; + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + std::vector statVec = { "-1", "1" } ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + sortFcn sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + + size_t evProcPos( const std::vector>& evtSet, event& currEv, + statSort sorter, std::vector statVec = {"-1", "1"} ) + { + for( size_t k = 0 ; k < evtSet.size() ; ++k ) + { + if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } + } + return evtSet.size(); + } + + // ZW: fcn for extracting the subprocess ordering + // of LHE file + std::vector>> procOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) + { + //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; + std::vector>> eventBools( evtSet.size(), std::make_shared> ( lheFile.getEvents().size() )); + //std::vector> pracBools( evtSet.size(), std::vector ( lheFile.getEvents().size() )); + for( auto boolSets : eventBools ){ + std::fill( boolSets->begin(), boolSets->end(), false ); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + auto currProc = lheProc(*lheFile.getEvents()[k]); + eventBools[ procPos(evtSet, currProc, statVec) ]->at( k ) = true; + } + //for( size_t k = 0 ; k < eventBools.size() ; ++k ) + //{ + // eventBools[k] = std::make_shared>( pracBools[k] ); + //} + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], statVec) ]->at( k ) = true; + } + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + std::vector>> eventBools; + eventBools.reserve(evtSet.size()); + for (size_t i = 0; i < evtSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; + } + return eventBools; + } + + std::vector>> evProcOrder( lheNode& lheFile, std::vector& evSet ){ + std::vector>> eventBools; + eventBools.reserve(evSet.size()); + for (size_t i = 0; i < evSet.size(); ++i) { + eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); + } + for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) + { + for( size_t i = 0 ; i < evSet.size() ; ++i ) + { + if( evSet[i].belongs( lheFile.getEvents()[k] ) ) + { + eventBools[i]->at(k) = true; + break; + } + } + } + return eventBools; + } + + // ZW: fcn for reordering LHE file based on subprocess + std::shared_ptr>> eventReOrder( lheNode& lheFile, std::vector relProc ) + { + auto reOrdered = std::make_shared>>(); + reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); + for( size_t k = 0 ; k < relProc.size() ; ++k ) + { + if(!relProc[k]){continue;} + reOrdered->push_back( lheFile.getEvents()[k] ); + } + return reOrdered; + } + + // ZW: wrapper for eventReOrder + std::vector>>> lheReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = processPull( lheFile, statVec ); + auto relProcs = procOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + sortFcn sorter, std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + statSort sorter, + std::vector statVec = { "-1", "1" } ) + { + auto procSets = evProcessPull( lheFile, sorter, statVec ); + auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + std::vector>>> lheEvReOrder( lheNode& lheFile, + std::vector> procSets, std::vector>> relProcs, + statSort sorter, std::vector statVec = { "-1", "1" } ) + { + //auto procSets = evProcessPull( lheFile, sorter, statVec ); + //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + std::vector>>> ordProcs(procSets.size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ) + { + ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); + } + return ordProcs; + } + + // ZW: transposed event information struct + evtInfo::evtInfo( const std::vector>& lheFile ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + nprts.push_back(evt->getHead().getNprt()); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc()[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); + for( auto evt : lheFile ) + { + wgts.push_back(evt->getHead().getWeight()); + scales.push_back(evt->getHead().getScale()); + aQEDs.push_back(evt->getHead().getAQED()); + aQCDs.push_back(evt->getHead().getAQCD()); + size_t nPrt = 0; + for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } + relNPrts.push_back(nPrt); + procIDs.push_back(evt->getHead().getProcID()); + } + } + + // ZW: transposed particle information struct + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto prt : evt->getPrts() ) + { + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder()[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + sortFcn sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, + statSort sorter ){ + int nEvt = lheFile.size(); + moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); + spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); + for( auto evt : lheFile ) + { + for( auto stat : statVec ) + { + for( auto i : evt->getProcOrder(sorter)[stat] ) + { + auto prt = evt->getPrts()[i]; + moms.push_back( prt->getE() ); + masses.push_back( prt->getMass() ); + vtims.push_back( prt->getVTim() ); + spins.push_back( prt->getSpin() ); + statuses.push_back( prt->getStatus() ); + pdgs.push_back( prt->getPDG() ); + for( size_t k = 0 ; k < 2 ; ++k ) + { + moms.push_back( prt->getMom()[k] ); + mothers.push_back( prt->getMothers()[k] ); + icols.push_back( prt->getColor()[k] ); + } + moms.push_back( prt->getMom()[2] ); + } + } + } + } + + transSkel::transSkel(){ + this->procSets = std::vector>>(); + this->relProcs = std::vector>>(); + this->relEvSet = std::vector(); + } + transSkel::transSkel( transSkel& skeleton ){ + this->procSets = skeleton.procSets; + this->relProcs = skeleton.relProcs; + this->relEvSet = skeleton.relEvSet; + } + transSkel::transSkel( lheNode& lheFile, std::vector& evSet ){ + this->relProcs = evProcOrder( lheFile, evSet ); + this->relEvSet = std::vector(evSet.size(), false); + for ( size_t k = 0 ; k < this->relProcs.size() ; ++k ) + { + if( std::find(this->relProcs[k]->begin(), this->relProcs[k]->end(), true) != this->relProcs[k]->end() ) + { + this->relEvSet[k] = true; + } + } + this->procSets = std::vector>>(std::count(this->relEvSet.begin(), this->relEvSet.end(), true)); + auto evs = lheFile.getEvents(); + size_t j = 0; + for( size_t k = 0 ; k < this->relEvSet.size() ; ++k ) + { + if( this->relEvSet[k] ) + { + this->procSets[j] = std::vector>(); + for( size_t m = 0 ; m < relProcs[k]->size() ; ++m ) + { + if( relProcs[k]->at(m) ) + { + this->procSets[j].push_back(evs[m]); + } + } + ++j; + } + } + } + transSkel::transSkel( std::shared_ptr lheFile, std::vector& evSet ) : transSkel(*lheFile, evSet){}; + + // ZW: transposed LHE file with a single process type + transMonoLHE::transMonoLHE( const std::vector> lheFile , const int nPrt ){ + evtsHead = evtInfo(lheFile); + evtsData = prtInfo(lheFile, nPrt); + process = lheFile[0]; + } + transMonoLHE::transMonoLHE( const std::vector> lheFile, const int nPrt, const std::vector& statVec ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec); + process = lheFile[0]; + } + transMonoLHE::transMonoLHE( const std::vector> lheFile, const int nPrt, + sortFcn sorter, + std::vector statVec ){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; + } + transMonoLHE::transMonoLHE( const std::vector> lheFile, const int nPrt, + statSort sorter, + std::vector statVec){ + evtsHead = evtInfo(lheFile, statVec); + evtsData = prtInfo(lheFile, nPrt, statVec, sorter); + process = lheFile[0]; + } + + // ZW: transposed LHE file ordered by subprocess + transLHE::transLHE(){ return; } + transLHE::transLHE( lheNode& lheFile ) + { + procSets = evProcessPull( lheFile ); + relProcs = evProcOrder( lheFile, procSets ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); + } + } + transLHE::transLHE( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec ) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE::transLHE( lheNode& lheFile, + statSort sorter, + const std::vector& statVec) + { + procSets = evProcessPull( lheFile, sorter, statVec ); + relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); + } + } + transLHE::transLHE( lheNode& lheFile, const std::vector& statVec ) + { + procSets = evProcessPull( lheFile, statVec ); + relProcs = evProcOrder( lheFile, procSets, statVec ); + xmlFile = lheFile.getFile(); + auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, statVec ); + subProcs = std::vector>( procsOrdered.size() ); + for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) + { + subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); + } + } + transLHE::transLHE( transSkel& skeleton ){ + relProcs = skeleton.relProcs; + subProcs = std::vector>( skeleton.procSets.size() ); + for( size_t k = 0 ; k < skeleton.procSets.size() ; ++k ) + { + subProcs[k] = std::make_shared( skeleton.procSets[k], skeleton.procSets[k].at(0)->getNprt() ); + } + } +// template + std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) + { + if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + for( size_t k = 0 ; k < vecVec.size() ; ++k){ + if( vecVec[k]->size() == relProcs[k]->size() ) continue; + else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); + } + auto flatVec = std::make_shared>(relProcs[0]->size()); + for( size_t k = 0 ; k < relProcs.size() ; ++k ){ + size_t currInd = 0; + for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ + if( relProcs[k]->at(j) ){ + flatVec->at(currInd) = vecVec[k]->at(currInd); + ++currInd; + } + } + } + return flatVec; + } + + // ZW: vector transformation string_to_double + std::shared_ptr> vecStoD( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stod(std::string(stv)); + } ); + return valVec; + } + + // ZW: vector transformation string_to_int + std::shared_ptr> vecStoI( const std::vector dataVec ) + { + auto valVec = std::make_shared>( dataVec.size() ); + std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ + return std::stoi(std::string(stv)); + } ); + return valVec; + } + + // ZW: templated fcn for multiplying two vectors elementwise, + // assuming T has a multiplication operator* + template + std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ + if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } + auto valVec = std::make_shared>( vec1.size() ); + std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ + return v1 * v2; + } ); + return valVec; + } + + // ZW: bool struct to define which double values + // to extract transposed from LHE file + std::vector lheRetDs::getBools(){ + return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, + pup, mass, vtimup, spinup }; + } + + // ZW: bool struct to define which int values + // to extract transposed from LHE file + std::vector lheRetInts::getBools(){ + return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, + nup, idprup, idup, istup, mothup, icolup }; + } + + // ZW: function for extracting transposed double values + // from LHE file + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + //auto lheAOS = transLHE( lheFile ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + const std::vector& statVec, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + sortFcn sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + std::shared_ptr>>> lheValDoubles( lheNode& lheFile, + statSort sorter, + const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) + { + // ZW: hard-setting returning g_S instead of a_S for now + bool aStogS = true; + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheDos; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } + if( boolVec[1] ){ + std::vector xsecVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xsecVec.push_back(line->xsecup); + } + lheDs[currInd] = vecStoD( xsecVec ); + ++currInd; } + if( boolVec[2] ){ + std::vector xerrVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xerrVec.push_back(line->xerrup); + } + lheDs[currInd] = vecStoD( xerrVec ); + ++currInd; } + if( boolVec[3] ){ + std::vector xmaxVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + xmaxVec.push_back(line->xmaxup); + } + lheDs[currInd] = vecStoD( xmaxVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } + if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } + if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); + if( aStogS ){ + std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), + []( double alphaS ){ + auto gS = std::sqrt( 4. * M_PI * alphaS ); + return gS; + } ); + } + ++currInd; + } + if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } + } + + return lheDos; + } + + // ZW: function for extracting transposed int values + // from LHE file + std::shared_ptr>>> lheValInts( lheNode& lheFile, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, std::vector statVec, + lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + sortFcn sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } + + std::shared_ptr>>> lheValInts( lheNode& lheFile, + statSort sorter, + std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) + { + auto boolVec = vals.getBools(); + const int noVals = std::count(boolVec.begin(), boolVec.end(), true); + auto lheAOS = transLHE( lheFile, sorter, statVec ); + auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); + std::vector>> &lheDs = *lheIs; + int currInd = 0; + if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } + if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } + if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } + if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } + if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } + if( boolVec[5] ){ + std::vector lprVec( lheFile.getInit()->getLines().size() ); + for( auto line : lheFile.getInit()->getLines() ) + { + lprVec.push_back(line->lprup); + } + lheDs[currInd] = vecStoI( lprVec ); + ++currInd; } + for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) + { + if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } + if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } + if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } + if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } + if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } + if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } + } + return lheIs; + } +} + +#endif diff --git a/tools/REX/REX.h b/tools/REX/REX.h index 2dead7d333..94344bb193 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -42,10 +42,11 @@ // referred to as strings unless the difference is relevant namespace REX { - #pragma warning( push ) - #pragma warning( disable : 4101) - static const size_t npos = -1; - #pragma warning( pop ) + //#pragma warning( push ) + //#pragma warning( disable : 4101) + static const size_t npos = (size_t)-1; + #define UNUSED(x) (void)(x) + //#pragma warning( pop ) using sortFcn = std::function>(std::vector)>; using statSort = std::function>(std::string_view, std::vector)>; @@ -320,6 +321,9 @@ namespace REX event( xmlTree& originFile ); event( xmlTree* originFile ); event( std::shared_ptr originFile ); + event( const event& original ); + event( event* original ); + event( std::shared_ptr original ); bool prtsAreMod(); bool headIsMod(); bool isSpecSort() const; @@ -338,7 +342,7 @@ namespace REX std::map> procMap; std::map> procOrder; sortFcn eventSort = []( std::vector vec ){ return stoiSort( vec ); }; - statSort specSort = []( std::string_view stat, std::vector vec ){ return stoiSort( vec ); }; + statSort specSort = []( std::string_view stat, std::vector vec ){ UNUSED(stat); return stoiSort( vec ); }; bool specSorted = false; bool initProcMap(bool hard = false); bool initProcMap( sortFcn sorter, bool hard = false ); @@ -367,6 +371,29 @@ namespace REX std::map> &getProcOrder(statSort sorter); }; + using eventComparison = std::function&)>; + + using eventSetComp = std::function&)>; + + struct eventSet{ + eventSet(); + eventSet( const eventSet& nuEvents ); + eventSet( std::vector& nuEvents ); + eventSet( std::vector>& nuEvents ); + void setRelStats( std::vector& nuStats ); + void addEvent( event& nuEvent ); + void addEvent( std::shared_ptr nuEvent ); + void addEvent( std::vector& nuEvents ); + void addEvent( std::vector> nuEvents ); + void setComp( eventSetComp nuComp ); + bool belongs( event& nuEvent ); + bool belongs( std::shared_ptr nuEvent ); + protected: + std::vector events; + std::vector relStats = {"-1", "1"}; + eventSetComp comp; + }; + struct paramVal{ public: double value = 0; @@ -692,7 +719,7 @@ namespace REX std::shared_ptr init = std::make_shared(xmlFile, start); std::vector relStat = {"-1", "1"}; sortFcn particleSort = []( std::vector prts ){ return stoiSort(prts); }; - statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ return stoiSort(prts); }; + statSort statParticleSort = []( std::string_view dummy, std::vector prts ){ UNUSED(dummy); return stoiSort(prts); }; virtual void headerWriter(); virtual void initWriter(); virtual void eventWriter(); @@ -737,17 +764,28 @@ namespace REX statSort sorter ); }; + struct transSkel { + public: + std::vector>> procSets; + std::vector>> relProcs; + std::vector relEvSet; + transSkel(); + transSkel( transSkel& skeleton ); + transSkel( lheNode& lheFile, std::vector& evSet ); + transSkel( std::shared_ptr lheFile, std::vector& evSet ); + }; + struct transMonoLHE { public: evtInfo evtsHead; prtInfo evtsData; std::shared_ptr process; - transMonoLHE( const std::vector>& lheFile = {}, const int nPrt = 8 ); - transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ); - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE( const std::vector> lheFile = {}, const int nPrt = 8 ); + transMonoLHE( const std::vector> lheFile, const int nPrt, const std::vector& statVec ); + transMonoLHE( const std::vector> lheFile, const int nPrt, sortFcn sorter, std::vector statVec = { "-1", "1" } ); - transMonoLHE( const std::vector>& lheFile, const int nPrt, + transMonoLHE( const std::vector> lheFile, const int nPrt, statSort sorter, std::vector statVec = { "-1", "1" } ); }; @@ -767,6 +805,7 @@ namespace REX statSort sorter, const std::vector& statVec = { "-1", "1" } ); transLHE( lheNode& lheFile, const std::vector& statVec ); + transLHE( transSkel& skeleton ); std::shared_ptr> vectorFlat( std::vector>> vecVec ); }; diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp index 706c65cca4..0d62e1d8a7 100644 --- a/tools/REX/REX.hpp +++ b/tools/REX/REX.hpp @@ -489,7 +489,7 @@ namespace REX xmlNode::xmlNode(){ modded = false; return; } xmlNode::xmlNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ){ modded = false; - xmlFile = originFile; + xmlFile = originFile.substr( begin ); structure = xmlTree( originFile ); faux = structure.isFaux(); start = structure.getStart(); @@ -497,8 +497,8 @@ namespace REX size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - for( auto& child : *(structure.getChildren()) ){ - children.push_back( std::make_shared( *child ) ); + for( auto child : childs ){ + children.push_back( child ); } } xmlNode::xmlNode( xmlTree &tree ){ @@ -814,7 +814,7 @@ namespace REX if( isModded() || !isWritten() ){ headWriter( incId ); contWriter(); - childWriter( ); + childWriter( hasChildren ); endWriter(); writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); modded = false; @@ -1450,9 +1450,9 @@ namespace REX hasBeenProc = true; return true; } - bool event::inRwgtChild( std::string_view name ){ + bool event::inRwgtChild( std::string_view nameIn ){ for( auto child : childRwgt->getChildren() ){ - for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), name)){ return true; } } + for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), nameIn)){ return true; } } } return false; } @@ -1577,7 +1577,7 @@ namespace REX } event& makeEv( std::vector>& particles ){ - auto returnEvent = event( particles ); + static auto returnEvent = event( particles ); return returnEvent; } @@ -1906,6 +1906,7 @@ namespace REX initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 7fa2ab2b5f..77b0efd928 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -15,6 +15,7 @@ #include "rwgt_instance.h" #include #include +#include %(include_lines)s int usage( char* argv0, int ret = 1 ) @@ -93,23 +94,40 @@ int main( int argc, char** argv ){ }} - // ZW : include rwgt_instances(s) + static REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); + + static std::vector runSet = {%(run_set)s}; - std::vector runSet = {%(run_set)s}; // std::vector runSet; - REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); - fileCol.initCards(); + static REX::transSkel loadEvs = fileCol.initCards( runSet ); + + fileCol.initDoubles(); - REX::teaw::ampCall subProcSet; +// static std::vector&, unsigned int )>> fBridgeConstr; + static std::vector fBridgeVec = {%(fbridge_vec)s}; - for( auto proc : runSet ){ - subProcSet.insert( REX::teaw::ampPair( proc.procEventInt, proc.bridgeCall ) ); + static std::vector bridges; + + static std::vector amps; + + for( size_t k = 0 ; k < runSet.size() ; ++k ){ + if( !loadEvs.relEvSet[k] ){ continue; } + fBridgeVec[k].init( loadEvs.procSets[k], 32 ); + bridges.push_back( fBridgeVec[k] ); + REX::teaw::amplitude currAmp = std::bind(&rwgt::fBridge::bridgeCall, &bridges.back(), std::placeholders::_1, std::placeholders::_2); + amps.push_back( currAmp ); } + // REX::teaw::ampCall subProcSet; + + // for( auto proc : runSet ){ + // subProcSet.insert( REX::teaw::ampPair( proc.procEventInt, proc.bridgeCall ) ); + // } + //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); //std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; - REX::teaw::rwgtRunner driver( fileCol, subProcSet ); + REX::teaw::rwgtRunner driver( fileCol, amps ); driver.runRwgt( outputPath ); diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc index a927754625..a791cd45a4 100644 --- a/tools/REX/rwgt_instance.cc +++ b/tools/REX/rwgt_instance.cc @@ -17,7 +17,6 @@ namespace rwgt{ - //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp ){ @@ -26,52 +25,160 @@ namespace rwgt{ //ZW: Function for padding the input arrays to a multiple of the warp size template - std::shared_ptr> warpPad( std::vector& input, unsigned int nWarp = 32 ){ - auto nEvt = input->size(); + std::vector& warpPad( std::vector& input, unsigned int nWarp = 32 ){ + auto nEvt = input.size(); auto nWarpRemain = warpRemain( nEvt, nWarp ); auto fauxNEvt = nEvt + nWarpRemain; - auto output = std::make_shared>( fauxNEvt ); - std::copy( input.begin(), input.end(), output->begin()); + auto output = std::vector( fauxNEvt ); + std::copy( input.begin(), input.end(), output.begin()); return output; } - instance::instance(){} - instance::instance( std::vector>& event){ - this->procEventInt = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEventInt = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void instance::setProc( std::vector>& event ){ - this->procEventInt = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event){ - this->procEventStr = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEventStr = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void instance::setProc( std::vector>& event ){ - this->procEventStr = event; - this->process = REX::event( event ); - } - void instance::setAmp( REX::teaw::amplitude& amp ){ - bridgeCall = amp; - } - std::shared_ptr> instance::ampEval( std::vector& momenta, std::vector& alphaS ){ - return bridgeCall( momenta, alphaS ); - } - std::shared_ptr> instance::ampEval( std::shared_ptr> momenta, - std::shared_ptr> alphaS ){ - return bridgeCall( *momenta, *alphaS ); - } + fBridge::fBridge(){} + fBridge::fBridge( REX::event& process ){ + this->nPar = process.getPrts().size(); + } + fBridge::fBridge( std::vector& process, unsigned int warpSize){ + this->nPar = process[0].getPrts().size(); + this->nEvt = process.size(); + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + fBridge::fBridge( std::vector> process, unsigned int warpSize){ + this->nPar = process[0]->getPrts().size(); + this->nEvt = process.size(); + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + fBridge::fBridge( const fBridge& source ){ + this->rndHel = source.rndHel; + this->rndCol = source.rndCol; + this->selHel = source.selHel; + this->selCol = source.selCol; + this->chanId = source.chanId; + this->nMom = source.nMom; + this->nWarp = source.nWarp; + this->nWarpRemain = source.nWarpRemain; + this->nEvt = source.nEvt; + this->fauxNEvt = source.fauxNEvt; + this->nPar = source.nPar; + this->bridge = source.bridge; + } + void fBridge::init( std::vector& process, unsigned int warpSize ){ + this->nPar = process[0].getPrts().size(); + this->nEvt = process.size(); + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + void fBridge::init( std::vector> process, unsigned int warpSize ){ + this->nPar = process[0]->getPrts().size(); + this->nEvt = process.size(); + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + void fBridge::bridgeSetup( unsigned int& noEvts, unsigned int warpSize ){ + this->nEvt = noEvts; + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + void fBridge::bridgeSetup( std::vector& evVec, unsigned int warpSize ){ + this->nEvt = evVec.size(); + this->nWarp = warpSize; + this->nWarpRemain = warpRemain( nEvt, nWarp ); + this->fauxNEvt = nEvt + nWarpRemain; + this->rndHel = std::vector( fauxNEvt, 0. ); + this->rndCol = std::vector( fauxNEvt, 0. ); + this->selHel = std::vector( fauxNEvt, 0. ); + this->selCol = std::vector( fauxNEvt, 0. ); + } + void fBridge::bridgeSetup( std::shared_ptr>& evVec, unsigned int warpSize ){ + this->bridgeSetup( *evVec, warpSize ); + } + void fBridge::setBridge( bridgeWrapper& amp ){ + if( this->bridge == nullptr){ + this->bridge = amp; + } else throw std::runtime_error("fBridge object doubly defined."); + } + std::shared_ptr> fBridge::bridgeCall( std::vector& momenta, std::vector& alphaS ){ + if(this->nEvt == 0) this->bridgeSetup( alphaS ); + // for( auto j = 0 ; j < nWarpRemain ; ++j ){ + // alphaS.push_back( 0. ); + // for( auto k = 0 ; k < nMom * nPar ; ++k ){ + // momenta.push_back( 0. ); + // } + // } + if( this->bridge == nullptr) throw std::runtime_error("fBridge object not defined."); + alphaS = warpPad( alphaS, nWarp ); + momenta = warpPad( momenta, nWarp * nPar * nMom ); + auto evalScatAmps = this->bridge(fauxNEvt, nPar, nMom, momenta, alphaS, rndHel, rndCol, selHel, selCol, chanId ); + alphaS.resize( nEvt ); + momenta.resize( nEvt * nPar * nMom ); + evalScatAmps->resize( nEvt ); + return evalScatAmps; + } + + instance::instance(){} + instance::instance( std::vector>& event){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventInt = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventInt = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event){ + this->procEventStr = event; + this->process = REX::event( event ); + } + instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ + this->procEventStr = event; + this->process = REX::event( event ); + bridgeCall = amp; + } + void instance::setProc( std::vector>& event ){ + this->procEventStr = event; + this->process = REX::event( event ); + } + void instance::setAmp( REX::teaw::amplitude& amp ){ + bridgeCall = amp; + } + std::shared_ptr> instance::ampEval( std::vector& momenta, std::vector& alphaS ){ + return bridgeCall( momenta, alphaS ); + } + std::shared_ptr> instance::ampEval( std::shared_ptr> momenta, + std::shared_ptr> alphaS ){ + return bridgeCall( *momenta, *alphaS ); + } } diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index 376635933e..825c24750d 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -15,14 +15,50 @@ #include "teawREX.h" -namespace rwgt{ +/** + * The floating point precision used in Fortran arrays. + * This is presently hardcoded to double precision (REAL*8). + */ +using FORTRANFPTYPE = double; // for Fortran double precision (REAL*8) arrays +//using FORTRANFPTYPE = float; // for Fortran single precision (REAL*4) arrays + - using FORTRANFPTYPE = double; +namespace rwgt{ //ZW: Function for calculating the number of remaining events in a warp // in order to pad the input arrays to a multiple of the warp size unsigned int warpRemain( unsigned int nEvt, unsigned int nWarp = 32 ); + // ZW: bridgeWrapper needs args: nEvs, nPar, nMom, moms, gs, rndhel, rndcol, selhel, selcol, chanId + using bridgeWrapper = std::function>( int&, int&, int&, std::vector&, std::vector&, std::vector&, std::vector&, std::vector&,std::vector&, unsigned int& )>; + + struct fBridge{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + unsigned int chanId = 0; + int nMom = 4; + int nWarp; + int nWarpRemain; + int nEvt; + int fauxNEvt; + int nPar; + bridgeWrapper bridge; + fBridge(); + fBridge( REX::event& process ); + fBridge( std::vector& process, unsigned int warpSize = 32 ); + fBridge( std::vector> process, unsigned int warpSize = 32 ); + fBridge( const fBridge& source ); + void init( std::vector& process, unsigned int warpSize = 32 ); + void init( std::vector> process, unsigned int warpSize = 32 ); + void bridgeSetup( unsigned int& noEvts, unsigned int warpSize = 32); + void bridgeSetup( std::vector& evVec, unsigned int warpSize = 32); + void bridgeSetup( std::shared_ptr>& evVec, unsigned int warpSize = 32); + void setBridge( bridgeWrapper& amp ); + std::shared_ptr> bridgeCall( std::vector& momenta, std::vector& alphaS ); + }; + struct instance{ std::vector> procEventInt; std::vector> procEventStr; diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 51be5e7ec7..59c3801103 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -19,114 +19,76 @@ namespace %(process_namespace)s{ //namespace dummy{ - struct fbridgeRunner{ - std::vector rndHel; - std::vector rndCol; - std::vector selHel; - std::vector selCol; - CppObjectInFortran *fBridge; - const unsigned int chanId = 0; - const int nMom = 4; - int nWarpRemain; - int nEvt; - int fauxNEvt; - int nPar; - bool setup = false; - fbridgeRunner(){} - fbridgeRunner( REX::event& process ){ - nPar = process.getPrts().size(); - } - void runnerSetup( unsigned int& noEvts, unsigned int warpSize = 32){ - if( setup ){ return; } - nEvt = noEvts; - nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); - fauxNEvt = nEvt + nWarpRemain; - rndHel = std::vector( fauxNEvt, 0. ); - rndCol = std::vector( fauxNEvt, 0. ); - selHel = std::vector( fauxNEvt, 0 ); - selCol = std::vector( fauxNEvt, 0 ); - setup = true; - } - void runnerSetup( std::vector& evVec, unsigned int warpSize = 32){ - if( setup ){ return; } - nEvt = evVec.size(); - nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); - fauxNEvt = nEvt + nWarpRemain; - rndHel = std::vector( fauxNEvt, 0. ); - rndCol = std::vector( fauxNEvt, 0. ); - selHel = std::vector( fauxNEvt, 0 ); - selCol = std::vector( fauxNEvt, 0 ); - setup = true; - } - void runnerSetup( std::shared_ptr> evVec, unsigned int warpSize = 32){ - if( setup ){ return; } - runnerSetup( *evVec, warpSize ); - } - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - runnerSetup( alphaS ); - for( size_t j = 0 ; j < nWarpRemain ; ++j ){ - alphaS.push_back( 0. ); - for( size_t k = 0 ; k < nMom * nPar ; ++k ){ - momenta.push_back( 0. ); - } - } - auto evalScatAmps = std::make_shared>( fauxNEvt ); - fbridgecreate_( &fBridge, &fauxNEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - alphaS.resize( nEvt ); - momenta.resize( nEvt * nPar * nMom ); - evalScatAmps->resize( nEvt ); - return evalScatAmps; - } - std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ - return scatAmp( *momenta, *alphaS ); - } -#if defined MGONGPU_FPTYPE_FLOAT - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - auto nuMom = std::vector( nEvt ); - auto nuAlphaS = std::vector( nEvt ); - std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) - std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); - return scatAmp( nuMom, nuAlphaS ); - } -#endif - }; + std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, unsigned int& chanId ){ + CppObjectInFortran *bridgeInst; + auto evalScatAmps = std::make_shared>( nEvt ); + fbridgecreate_( &bridgeInst, &nEvt, &nPar, &nMom ); + fbridgesequence_( &bridgeInst, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &bridgeInst ); + return evalScatAmps; + } + + rwgt::fBridge bridgeConstr( std::vector& process, unsigned int warpSize = 32 ){ + rwgt::fBridge constrBridge = rwgt::fBridge( process, warpSize ); + rwgt::bridgeWrapper amplitude = amp; + constrBridge.setBridge( amplitude ); + return constrBridge; + } + + rwgt::fBridge bridgeConstr(){ + rwgt::fBridge constrBridge = rwgt::fBridge(); + rwgt::bridgeWrapper amplitude = amp; + constrBridge.setBridge( amplitude ); + return constrBridge; + } - std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ - std::vector initPrts = %(init_prt_ids)s; - std::vector finPrts = %(fin_prt_ids)s; + std::shared_ptr> procSort( std::string_view status, std::vector arguments ){ + std::vector> initPrts = {%(init_prt_ids)s}; + std::vector> finPrts = {%(fin_prt_ids)s}; // std::vector initPrts = {"-1"}; // std::vector finPrts = {"1"}; + std::shared_ptr> refOrder; if( status == "-1" ){ - return REX::getRefOrder( initPrts, arguments ); + for( auto& prts : initPrts ){ + refOrder = REX::getRefOrder( prts, arguments ); + if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + } + return refOrder; } else if( status == "1" ){ - return REX::getRefOrder( finPrts, arguments ); + for( auto& prts : finPrts ){ + refOrder = REX::getRefOrder( prts, arguments ); + if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + } + return refOrder; } return REX::stoiSort( arguments ); } -// ZW: SET UP INPUT LHE BLOCK -// ZW: SET UP REX::event FROM LHE BLOCK -// auto procEvent = REX::event( procEvent ); -// REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - - std::vector> eventVec = {%(process_event)s}; - REX::event locEv = REX::event( eventVec ); - fbridgeRunner fBridge = fbridgeRunner( locEv ); - - REX::teaw::amplitude scatteringAmp = []( std::vector& momenta, std::vector& alphaS ){ - return fBridge.scatAmp( momenta, alphaS ); - }; - - REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - - auto runner = rwgt::instance(eventVec, scatteringAmp); - auto thisProc = runner.process.getProc( currProcSort ); -// ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE + bool checkProc( REX::event& process, std::vector& relStats ){ + REX::statSort locSort = procSort; + auto order = process.getProcOrder( locSort ); + for( auto stat : relStats ){ + auto currPts = order.at( stat ); + if( currPts[currPts.size() - 1 ] == REX::npos ){ return false; } + } + return true; + } -// ZW: SET UP EVALUATION OF MATRIX ELEMENTS FUNCTION + REX::eventSet eventSetConstr( std::vector& process ){ + REX::eventSet constrSet = REX::eventSet( process ); + REX::eventSetComp compar = checkProc; + constrSet.setComp( compar ); + return constrSet; + } + REX::eventSet getEventSet(){ + std::vector>> eventVec = {%(process_events)s}; + std::vector process; + for( auto ev : eventVec ){ + process.push_back( REX::event( ev ) ); + } + return eventSetConstr( process ); + } } \ No newline at end of file diff --git a/tools/REX/rwgt_runner.h b/tools/REX/rwgt_runner.h new file mode 100644 index 0000000000..4c493e504e --- /dev/null +++ b/tools/REX/rwgt_runner.h @@ -0,0 +1,34 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (June 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for the CUDACPP plugin by +%(info_lines)s +//========================================================================== +//========================================================================== +// A class for reweighting matrix elements for +%(process_lines)s +//-------------------------------------------------------------------------- + +#ifndef _%(process_namespace)s_RUNNER_H_ +#define _%(process_namespace)s_RUNNER_H_ + +#include "rwgt_instance.h" + +namespace %(process_namespace)s { + + std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, int& chanId ); + rwgt::fBridge bridgeConstr( std::vector& process, unsigned int warpSize ); + rwgt::fBridge bridgeConstr(); + std::shared_ptr> procSort( std::string_view status, std::vector arguments ); + bool checkProc( REX::event& process, std::vector& relStats ); + REX::eventSet eventSetConstruct( std::vector& process ); + REX::eventSet getEventSet(); + +} + + + +#endif \ No newline at end of file diff --git a/tools/REX/rwgt_runner_copy.cc b/tools/REX/rwgt_runner_copy.cc new file mode 100644 index 0000000000..5e89f8c3d3 --- /dev/null +++ b/tools/REX/rwgt_runner_copy.cc @@ -0,0 +1,197 @@ +//========================================================================== +// Copyright (C) 2023-2024 CERN +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. +//========================================================================== +//========================================================================== +// This file has been automatically generated for the CUDACPP plugin by +%(info_lines)s +//========================================================================== +//========================================================================== +// A class for reweighting matrix elements for +%(process_lines)s +//-------------------------------------------------------------------------- + +#include "rwgt_instance.h" +#include "fbridge.cc" + +// ZW: SET UP NAMESPACE +namespace %(process_namespace)s{ +//namespace dummy{ + + std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, int& chanId ){ + CppObjectInFortran *bridgeInst; + auto evalScatAmps = std::make_shared>( nEvt ); + fbridgecreate_( &bridgeInst, &nEvt, &nPar, &nMom ); + fbridgesequence_( &bridgeInst, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &bridgeInst ); + return evalScatAmps; + } + + rwgt::fBridge& bridgeConstr( std::vector& process, unsigned int warpSize = 32 ){ + rwgt::fBridge constrBridge = rwgt::fBridge( process, warpSize ); + rwgt::bridgeWrapper amplitude = amp; + constrBridge.setBridge( amplitude ); + return constrBridge; + } + + std::shared_ptr> procSort( std::string_view status, std::vector arguments ){ + std::vector> initPrts = {%(init_prt_ids)s}; + std::vector> finPrts = {%(fin_prt_ids)s}; +// std::vector initPrts = {"-1"}; +// std::vector finPrts = {"1"}; + std::shared_ptr> refOrder; + if( status == "-1" ){ + for( auto& prts : initPrts ){ + refOrder = REX::getRefOrder( prts, arguments ); + if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + } + return refOrder; + } + else if( status == "1" ){ + for( auto& prts : finPrts ){ + refOrder = REX::getRefOrder( prts, arguments ); + if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + } + return refOrder; + } + return REX::stoiSort( arguments ); + } + + bool checkProc( REX::event& process, std::vector& relStats ){ + REX::statSort locSort = procSort; + auto order = process.getProcOrder( locSort ); + for( auto stat : relStats ){ + auto currPts = order.at( stat ); + if( currPts[currPts.size() - 1 ] == REX::npos ){ return false; } + } + return true; + } + + REX::eventSet& eventSetConstr( std::vector& process ){ + REX::eventSet constrSet = REX::eventSet( process ); + REX::eventSetComp compar = checkProc; + constrSet.setComp( compar ); + return constrSet; + } + + REX::eventSet& getEventSet(){ + std::vector>> eventVec = {%(process_events)s}; + std::vector process; + for( auto& ev : eventVec ){ + process.push_back( REX::event( ev ) ); + } + return eventSetConstr( process ); + } + + struct fbridgeRunner{ + std::vector rndHel; + std::vector rndCol; + std::vector selHel; + std::vector selCol; + CppObjectInFortran *fBridge; + const unsigned int chanId = 0; + const int nMom = 4; + int nWarpRemain; + int nEvt; + int fauxNEvt; + int nPar; + bool setup = false; + fbridgeRunner(){} + fbridgeRunner( REX::event& process ){ + nPar = process.getPrts().size(); + } + void runnerSetup( unsigned int& noEvts, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = noEvts; + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::vector& evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + nEvt = evVec.size(); + nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); + fauxNEvt = nEvt + nWarpRemain; + rndHel = std::vector( fauxNEvt, 0. ); + rndCol = std::vector( fauxNEvt, 0. ); + selHel = std::vector( fauxNEvt, 0 ); + selCol = std::vector( fauxNEvt, 0 ); + setup = true; + } + void runnerSetup( std::shared_ptr> evVec, unsigned int warpSize = 32){ + if( setup ){ return; } + runnerSetup( *evVec, warpSize ); + } + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + runnerSetup( alphaS ); + for( auto j = 0 ; j < nWarpRemain ; ++j ){ + alphaS.push_back( 0. ); + for( auto k = 0 ; k < nMom * nPar ; ++k ){ + momenta.push_back( 0. ); + } + } + auto evalScatAmps = std::make_shared>( fauxNEvt ); + fbridgecreate_( &fBridge, &fauxNEvt, &nPar, &nMom ); + fbridgesequence_( &fBridge, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgedelete_( &fBridge ); + alphaS.resize( nEvt ); + momenta.resize( nEvt * nPar * nMom ); + evalScatAmps->resize( nEvt ); + return evalScatAmps; + } + std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ + return scatAmp( *momenta, *alphaS ); + } +#if defined MGONGPU_FPTYPE_FLOAT + std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ + auto nuMom = std::vector( nEvt ); + auto nuAlphaS = std::vector( nEvt ); + std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) + std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); + return scatAmp( nuMom, nuAlphaS ); + } +#endif + }; + + std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ + std::vector initPrts = %(init_prt_ids)s; + std::vector finPrts = %(fin_prt_ids)s; +// std::vector initPrts = {"-1"}; +// std::vector finPrts = {"1"}; + if( status == "-1" ){ + return REX::getRefOrder( initPrts, arguments ); + } + else if( status == "1" ){ + return REX::getRefOrder( finPrts, arguments ); + } + return REX::stoiSort( arguments ); + } + +// ZW: SET UP INPUT LHE BLOCK +// ZW: SET UP REX::event FROM LHE BLOCK +// auto procEvent = REX::event( procEvent ); +// REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + std::vector> eventVec = {%(process_event)s}; + REX::event locEv = REX::event( eventVec ); + fbridgeRunner fBridge = fbridgeRunner( locEv ); + + REX::teaw::amplitude scatteringAmp = []( std::vector& momenta, std::vector& alphaS ){ + return fBridge.scatAmp( momenta, alphaS ); + }; + + REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; + + auto runner = rwgt::instance(eventVec, scatteringAmp); + auto thisProc = runner.process.getProc( currProcSort ); +// ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE + +// ZW: SET UP EVALUATION OF MATRIX ELEMENTS FUNCTION + + +} \ No newline at end of file diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc deleted file mode 120000 index f9640c2fcb..0000000000 --- a/tools/REX/teawREX.cc +++ /dev/null @@ -1 +0,0 @@ -teawREX.hpp \ No newline at end of file diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc new file mode 100644 index 0000000000..6a3d7e87a0 --- /dev/null +++ b/tools/REX/teawREX.cc @@ -0,0 +1,611 @@ +/*** + * _ ______ _______ __ + * | | | ___ \ ___\ \ / / + * | |_ ___ __ ___ _| |_/ / |__ \ V / + * | __/ _ \/ _` \ \ /\ / / /| __| / \ + * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ + * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ + * + ***/ + +// THIS IS NOT A LICENSED RELEASE +// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD +// FROM AN IMPROPER RELEASE. + +// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. +// All rights reserved. + +#ifndef _TEAWREX_CC_ +#define _TEAWREX_CC_ + +#include +#include +#include +#include +#include +#include +#include +#include "REX.cc" +#include "teawREX.h" + +namespace REX::teaw +{ + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + template + std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) + { return evalFunc(momenta); } + + rwgtVal::rwgtVal() : paramVal(){ return; } + rwgtVal::rwgtVal( std::string_view paramLine ) + : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } + realLine = paramLine; + auto vals = *REX::nuBlankSplitter( realLine ); + blockName = vals[1]; + idStr = vals[2]; + valStr = vals[3]; + } + std::string_view rwgtVal::getLine(){ return realLine; } + bool rwgtVal::isAll(){ return (idStr == "all"); } + void rwgtVal::outWrite( REX::paramBlock& srcBlock ){ + if ( isAll() ) + { + for( auto param : srcBlock.params ) + { + param.valStr = valStr; + param.modded = true; + } + return; + } + auto currPar = std::find_if( srcBlock.params.begin(), srcBlock.params.end(), + [&]( const REX::paramVal& parPar ){ return (parPar.idStr == idStr ); } ); + if( currPar == srcBlock.params.end() ){ + srcBlock.params.push_back( REX::paramVal( realLine.substr(realLine.find("set") + 4) ) ); + srcBlock.params[ srcBlock.params.size() - 1 ].modded = true; + srcBlock.modded = true; + return; + } + currPar->valStr = valStr; + currPar->modded = true; + srcBlock.modded = true; + return; + } + + rwgtBlock::rwgtBlock( std::vector values, std::string_view title) + { + name = title; + rwgtVals.resize( values.size() ); + for( size_t k = 0 ; k < values.size() ; ++k ) + { + rwgtVals[k] = rwgtVal( values[k] ); + } + } + rwgtBlock::rwgtBlock( const std::vector& vals, std::string_view title ) + { + name = title; + rwgtVals = vals; + } + std::string_view rwgtBlock::getBlock(){ + if( written ){ return runBlock; } + runBlock = ""; + for( auto val : rwgtVals ){ + runBlock += std::string(val.getLine()) + "\n"; + } + written = true; + return runBlock; + } + void rwgtBlock::outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) + { + for( auto parm : rwgtVals ) + { + parm.outWrite( srcBlock ); + } + srcBlock.modded = true; + return; + } + + void rwgtProc::parse(){ + std::vector blocks; + std::vector>> params; + auto procLines = *REX::nuLineSplitter( procString ); + for( auto line : procLines ) + { + auto strtPt = line.find("set"); + auto words = *REX::nuWordSplitter( line.substr(strtPt) ); + auto currBlock = words[1]; + auto loc = std::find_if( blocks.begin(), blocks.end(), + [&]( std::string_view block ){ return (block == currBlock); } ); + if( loc == blocks.end() ){ + blocks.push_back( currBlock ); + params.push_back( std::make_shared>( std::vector({rwgtVal( line )} ) )); } + else { + params[ std::distance( blocks.begin(), loc ) - 1 ]->push_back( rwgtVal( line ) ); + } + } + rwgtParams.reserve(blocks.size()); + for( size_t k = 0 ; k < blocks.size() ; ++k ) + { + rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); + } + } + rwgtProc::rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet, bool parseOnline ) + { + if( rwgtSet == "" ){ return; } + auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; + auto endLi = rwgtSet.find("\n", strtLi); + while( rwgtSet[rwgtSet.find_first_not_of("\n ", endLi)] == 's' ) + { endLi = rwgtSet.find( "\n", endLi + 1 ); } + procString = rwgtSet.substr( strtLi, endLi - strtLi ); + if( parseOnline ){ parse(); } + } + std::shared_ptr rwgtProc::outWrite( const REX::lesHouchesCard& paramOrig ){ + auto slhaOrig = std::make_shared( paramOrig ); + std::map blockIds; + for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) + { slhaOrig->blocks[k].parse( true ); + auto nyama = std::pair( slhaOrig->blocks[k].name, k); + blockIds.insert( nyama ); } + for( auto rwgts : rwgtParams ) + { rwgts.outWrite( slhaOrig->blocks[ blockIds.at( rwgts.name ) ], blockIds ); } + slhaOrig->modded = true; + return slhaOrig; + } + std::string_view rwgtProc::comRunProc(){ return procString; } + + void rwgtCard::parse( bool parseOnline ) { + auto strt = srcCard.find("launch"); + auto commPos = srcCard.find_last_of("#", strt); + while( commPos > srcCard.find_last_of("\n", strt) ){ + if( commPos == REX::npos ){ + break; + } + strt = srcCard.find("launch", strt + 6 ); + } + while( auto chPos = srcCard.find( "set" ) < strt ){ + if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } + opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); + } + std::vector lnchPos({strt}); + auto nuLnch = srcCard.find( "launch", strt + 6 ); + while ( nuLnch != std::string_view::npos ) + { + if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } + nuLnch = srcCard.find( "launch", nuLnch + 6 ); + } + for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) + { + auto strtLi = srcCard.find( "set", lnchPos[k] ); + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); + if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ + auto strtPos = srcCard.find( "--", lnchPos[k] ); + while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ + auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); + if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. + rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); + } + if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } + strtPos = nuStrtPos; + } + } + } + size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); + if( srcCard.substr( endLi + 1, 3 ) == "set" ){ + while( srcCard.substr( endLi + 1, 3 ) == "set" ) + { + endLi = srcCard.find( "\n", endLi + 1 ); + } + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); + } + rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); + rwgtNames.reserve( rwgtRuns.size() ); + int p = 1; + for( auto run : rwgtRuns ){ + rwgtProcs.push_back( run.comRunProc() ); + if( run.rwgtName == "" ){ + rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); + } else { + rwgtNames.push_back( std::string(run.rwgtName) ); + } + } + } + rwgtCard::rwgtCard( std::string_view reweight_card ){ + srcCard = reweight_card; + } + rwgtCard::rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline ){ + srcCard = reweight_card; + slhaCard = slhaParams; + if( parseOnline ){ parse( parseOnline ); } + } + std::vector> rwgtCard::writeCards( REX::lesHouchesCard& slhaOrig ){ + std::vector> cardVec; + slhaOrig.parse(); + cardVec.reserve( rwgtRuns.size() ); + for( auto rwgt : rwgtRuns ) + { + cardVec.push_back( rwgt.outWrite( slhaOrig ) ); + } + return cardVec; + } + + void rwgtCollection::setRwgt( std::shared_ptr rwgts ){ + if( rwgtSet ){ return; } + rwgtSets = rwgts; + rwgtSet = true; + } + void rwgtCollection::setRwgt( rwgtCard rwgts ){ + if( rwgtSet ){ return; } + setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; + } + void rwgtCollection::setSlha( std::shared_ptr slha ){ + if( slhaSet ){ return; } + slhaParameters = slha; + slhaParameters->parse(); + slhaSet = true; + } + void rwgtCollection::setSlha( REX::lesHouchesCard slha ){ + if( slhaSet ){ return; } + setSlha( std::make_shared( slha ) ); + slhaSet = true; + } + void rwgtCollection::setLhe( std::shared_ptr lhe ){ + if( lheFileSet ){ return; } + lheFile = lhe; + lheFileSet = true; + } + void rwgtCollection::setLhe( REX::lheNode& lhe ){ + if( lheFileSet ){ return; } + setLhe( std::make_shared( lhe ) ); + lheFileSet = true; + } + void rwgtCollection::setLhe( std::string_view lhe_file ){ + if( lheFileSet ){ return; } + //lheFile = REX::lheParser( lhe_file, strt, post ); + lheFile = std::make_shared( REX::lheNode(lhe_file) ); + lheFileSet = true; + } + std::shared_ptr rwgtCollection::getRwgt(){ return rwgtSets; } + std::shared_ptr rwgtCollection::getSlha(){ return slhaParameters; } + std::shared_ptr rwgtCollection::getLhe(){ return lheFile; } + rwgtCollection::rwgtCollection(){ return; } + rwgtCollection::rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ + setLhe( lhe ); + setSlha( slha ); + setRwgt( rwgts ); + } + REX::transSkel& rwgtCollection::getSkeleton(){ + if( !this->skeleton ) + throw std::runtime_error( "Skeleton has not been set." ); + return this->lheSkeleton; + } + REX::transSkel& rwgtCollection::getSkeleton( std::vector& evSets ){ + if( this->skeleton ){ return this->lheSkeleton; } + setSkeleton( evSets ); + return this->lheSkeleton; + } + template + void rwgtCollection::setDoubles(Args&&... args){ + if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) + throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); + REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; + eventFile = REX::transLHE( *lheFile, args... ); + auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); + if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) + throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); + //wgts[0] = vecOfVecs->at( 0 ); gS[0] = vecOfVecs->at( 1 ); momenta[0] = vecOfVecs->at( 2 ); + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + wgts.push_back( vecOfVecs->at( 3*k ) ); + gS.push_back( vecOfVecs->at( 3*k + 1 ) ); + momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); + } + } + void rwgtCollection::setSkeleton( std::vector& evSets ){ + if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) + throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); + this->lheSkeleton = transSkel( lheFile, evSets ); + this->skeleton = true; + } + void rwgtCollection::setDoublesFromSkeleton(){ + if( !this->skeleton ) + throw std::runtime_error( "Skeleton has not been set." ); + REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; + this->eventFile = REX::transLHE( this->lheSkeleton ); + auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); + if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) + throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + wgts.push_back( vecOfVecs->at( 3*k ) ); + gS.push_back( vecOfVecs->at( 3*k + 1 ) ); + momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); + } + } + + void rwgtFiles::setRwgtPath( std::string_view path ){ rwgtPath = path; } + void rwgtFiles::setSlhaPath( std::string_view path ){ slhaPath = path; } + void rwgtFiles::setLhePath( std::string_view path ){ lhePath = path; } + rwgtFiles::rwgtFiles() : rwgtCollection(){ return; } + rwgtFiles::rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ + setRwgtPath( reweight_card ); + setSlhaPath( slha_card ); + setLhePath( lhe_card ); + } + REX::transSkel& rwgtFiles::initCards( std::vector& evSets ){ + if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) + throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); + this->pullRwgt(); this->pullSlha(); this->pullLhe(); + this->setLhe( *lheCard ); + this->setSlha( std::make_shared( *slhaCard ) ); + this->setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); + return this->getSkeleton( evSets ); + } + template + void rwgtFiles::initCards(Args&&... args){ + if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) + throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); + pullRwgt(); pullSlha(); pullLhe(); + setLhe( *lheCard ); + setSlha( std::make_shared( *slhaCard ) ); + setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); + setDoubles(args...); + } + template + void rwgtFiles::initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ + setLhePath( lhe_card ); + setSlhaPath( slha_card ); + setRwgtPath( reweight_card ); + initCards(args...); + } + void rwgtFiles::initDoubles(){ + if( !this->skeleton ) + throw std::runtime_error( "Skeleton has not been set." ); + this->setDoublesFromSkeleton(); + } + void rwgtFiles::pullRwgt(){ + rewgtCard = REX::filePuller( rwgtPath ); + } + void rwgtFiles::pullSlha(){ + slhaCard = REX::filePuller( slhaPath ); + } + void rwgtFiles::pullLhe(){ + lheCard = REX::filePuller( lhePath ); + } + + void rwgtRunner::setMeEval( amplitude eval ){ + meEval = eval; meInit = true; +// ampCall nuEvals; +// nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); +// meEvals = nuEvals; + } +// void rwgtRunner::setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } + void rwgtRunner::addMeEval( const REX::event& ev, const amplitude& eval ){}// meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } + rwgtRunner::rwgtRunner() : rwgtFiles(){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ + meEval = meCalc; + meInit = true; + } + // rwgtRunner::rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ + // meEvals = meCalcs; + // meCompInit = true; + // } + rwgtRunner::rwgtRunner( rwgtFiles& rwgts, std::vector& meCalcs ) : rwgtFiles( rwgts ){ + meVec = meCalcs; + meCompInit = true; + } + rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + meEval = meCalc; + meInit = true; + } + // rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + // ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ + // meEvals = meCalcs; + // meCompInit = true; + // } + bool rwgtRunner::oneME(){ return (meInit != meCompInit); } + bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } + template + void rwgtRunner::setMEs(Args&&... args){ + initCards(args...); + if( !oneME() ) + throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); + //ZW FIX THIS + initMEs = {}; + if( meVec.size() != 0 ){ + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + auto ins = meVec[k]( *(momenta[k]), *(gS[k]) ); + initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); + } + } + else{ + // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + // { + // auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); + // initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); + // } + } + //auto ins = meEval( *(momenta[0]), *(gS[0]) ); + //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; + meSet = true; + } + bool rwgtRunner::setParamCard( std::shared_ptr slhaParams ){ + if( slhaPath == "" ) + throw std::runtime_error( "No parameter card path has been provided." ); + if( slhaParameters == nullptr ) + throw std::runtime_error( "No SLHA parameter card has been provided." ); + if( !REX::filePusher( slhaPath, *slhaParams->selfWrite() ) ) + throw std::runtime_error( "Failed to overwrite parameter card." ); + return true; + } + void rwgtRunner::setNormWgtsSingleME(){ + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + meNormWgts = {std::make_shared>( wgts[0]->size() )}; + for( size_t k = 0; k < initMEs[0]->size(); k++ ){ + meNormWgts[0]->at( k ) = wgts[0]->at( k ) / initMEs[0]->at( k ); + } + normWgt = meNormWgts[0]; + } + void rwgtRunner::setNormWgtsMultiME(){ + meNormWgts = std::vector>>( initMEs.size() ); + for( size_t k = 0 ; k < wgts.size() ; ++k ){ + meNormWgts[k] = std::make_shared>( wgts[k]->size() ); + for( size_t i = 0 ; i < wgts[k]->size() ; ++i ){ + meNormWgts[k]->at( i ) = wgts[k]->at( i ) / initMEs[k]->at( i ); + } + } + normWgt = eventFile.vectorFlat( meNormWgts ); + } + template + void rwgtRunner::setNormWgts(Args&&... args){ + if( !oneME() ){ setMEs(args...); } + //if( initMEs->size() != wgts[0]->size() ) + // throw std::runtime_error( "Inconsistent number of events and event weights." ); + for( size_t k = 0; k < initMEs.size() ; ++k ){ + if( initMEs[k]->size() != wgts[k]->size() ) + throw std::runtime_error( "Inconsistent number of events and event weights." ); + } + if( initMEs.size() == 1 ){ setNormWgtsSingleME(); } + else { setNormWgtsMultiME(); } + normWgtSet = true; + } + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + // { + // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + // } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheIn->addWgt( 0, nuWgt ); + return true; + } + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, std::string& id ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + // { + // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + // } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheIn->addWgt( 0, nuWgt ); + return true; + } + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + //auto newMEs = meEval( *momenta, *gS ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + // { + // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + // } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + lheIn->addWgt( 0, nuWgt ); + return true; + } + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, + std::string& id, REX::event& ev ){ + if( !normWgtSet ) + throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); + if( !setParamCard( slhaParams ) ) + throw std::runtime_error( "Failed to rewrite parameter card." ); + std::shared_ptr> newWGTs; + if( singAmp() ){ + auto newMEs = meEval( *momenta[0], *gS[0] ); + newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); + } + else{ + std::vector>> nuMEs = {}; + // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + // { + // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); + // } + std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); + newWGTs = REX::vecElemMult( *newMEs, *normWgt ); + } + //ZW IF MULTIPLE TYPES + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + lheIn->addWgt( 0, nuWgt ); + return true; + } + bool rwgtRunner::lheFileWriter( std::shared_ptr lheIn, std::string outputDir ){ + bool writeSuccess = REX::filePusher( outputDir, *lheIn->nodeWriter() ); + if( !writeSuccess ) + throw std::runtime_error( "Failed to write LHE file." ); + return true; + } + void rwgtRunner::runRwgt( const std::string& output ){ + setMEs(); + setNormWgts(); + rwgtGroup = std::make_shared(); + auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); + auto paramSets = rwgtSets->writeCards( *slhaParameters ); + for( size_t k = 0 ; k < paramSets.size(); k++ ){ + singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); + std::cout << "."; + } + lheFileWriter( lheFile, output ); + REX::filePusher( slhaPath, *slhaCard ); + std::cout << "\nReweighting done.\n"; + } + + void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ + rwgt.runRwgt( path ); + } +} + +#endif diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h index a865db4944..7c1120c5b9 100644 --- a/tools/REX/teawREX.h +++ b/tools/REX/teawREX.h @@ -31,8 +31,8 @@ namespace REX::teaw { using amplitude = std::function>(std::vector&, std::vector&)>; - using ampCall = std::map; - using ampPair = std::pair; + //using ampCall = std::map; + //using ampPair = std::pair; using vecMap = std::map>, REX::eventComp>; struct rwgtVal : REX::paramVal{ @@ -98,11 +98,15 @@ namespace REX::teaw std::shared_ptr getRwgt(); std::shared_ptr getSlha(); std::shared_ptr getLhe(); + REX::transSkel& getSkeleton(); + REX::transSkel& getSkeleton( std::vector& evSets ); rwgtCollection(); rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ); protected: template void setDoubles(Args&&... args); + void setSkeleton( std::vector& evSets); + void setDoublesFromSkeleton(); std::shared_ptr rwgtSets; std::shared_ptr slhaParameters; std::shared_ptr lheFile; @@ -112,7 +116,9 @@ namespace REX::teaw bool lheFileSet = false; bool slhaSet = false; bool rwgtSet = false; + bool skeleton = false; REX::transLHE eventFile; + REX::transSkel lheSkeleton; }; struct rwgtFiles : rwgtCollection { @@ -121,6 +127,8 @@ namespace REX::teaw void setLhePath( std::string_view path ); rwgtFiles(); rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ); + REX::transSkel& initCards( std::vector& evSets); + void initDoubles(); template void initCards(Args&&... args); template @@ -140,16 +148,17 @@ namespace REX::teaw struct rwgtRunner : rwgtFiles{ public: void setMeEval( amplitude eval ); - void setMeEvals( ampCall evals ); + //void setMeEvals( ampCall evals ); void addMeEval( const REX::event& ev, const amplitude& eval ); rwgtRunner(); rwgtRunner( rwgtFiles& rwgts ); rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ); - rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ); + //rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ); + rwgtRunner( rwgtFiles& rwgts, std::vector& meCalcs ); rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, amplitude meCalc ); - rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - ampCall meCalcs ); + //rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, + //ampCall meCalcs ); bool oneME(); bool singAmp(); protected: @@ -158,7 +167,8 @@ namespace REX::teaw bool meSet = false; bool normWgtSet = false; amplitude meEval; - ampCall meEvals; + //ampCall meEvals; + std::vector meVec; std::vector>> initMEs; std::vector>> meNormWgts; std::shared_ptr> normWgt; diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp index ae9efff776..55740d4aac 100644 --- a/tools/REX/teawREX.hpp +++ b/tools/REX/teawREX.hpp @@ -122,7 +122,7 @@ namespace REX::teaw for( auto line : procLines ) { auto strtPt = line.find("set"); - auto words = *REX::nuWordSplitter( line ); + auto words = *REX::nuWordSplitter( line.substr(strtPt) ); auto currBlock = words[1]; auto loc = std::find_if( blocks.begin(), blocks.end(), [&]( std::string_view block ){ return (block == currBlock); } ); @@ -165,7 +165,8 @@ namespace REX::teaw void rwgtCard::parse( bool parseOnline ) { auto strt = srcCard.find("launch"); - while( auto commPos = srcCard.find_last_of("#", strt) > srcCard.find_last_of("\n", strt) ){ + auto commPos = srcCard.find_last_of("#", strt); + while( commPos > srcCard.find_last_of("\n", strt) ){ if( commPos == REX::npos ){ break; } @@ -376,7 +377,7 @@ namespace REX::teaw throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); //ZW FIX THIS initMEs = {}; - for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); @@ -405,9 +406,9 @@ namespace REX::teaw } void rwgtRunner::setNormWgtsMultiME(){ meNormWgts = std::vector>>( initMEs.size() ); - for( auto k = 0 ; k < wgts.size() ; ++k ){ + for( size_t k = 0 ; k < wgts.size() ; ++k ){ meNormWgts[k] = std::make_shared>( wgts[k]->size() ); - for( auto i = 0 ; i < wgts[k]->size() ; ++i ){ + for( size_t i = 0 ; i < wgts[k]->size() ; ++i ){ meNormWgts[k]->at( i ) = wgts[k]->at( i ) / initMEs[k]->at( i ); } } @@ -418,7 +419,7 @@ namespace REX::teaw if( !oneME() ){ setMEs(args...); } //if( initMEs->size() != wgts[0]->size() ) // throw std::runtime_error( "Inconsistent number of events and event weights." ); - for( auto k = 0; k < initMEs.size() ; ++k ){ + for( size_t k = 0; k < initMEs.size() ; ++k ){ if( initMEs[k]->size() != wgts[k]->size() ) throw std::runtime_error( "Inconsistent number of events and event weights." ); } @@ -426,7 +427,7 @@ namespace REX::teaw else { setNormWgtsMultiME(); } normWgtSet = true; } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -438,7 +439,7 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); } @@ -447,10 +448,10 @@ namespace REX::teaw } //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); - lheFile->addWgt( 0, nuWgt ); + lheIn->addWgt( 0, nuWgt ); return true; } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, std::string& id ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, std::string& id ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -462,7 +463,7 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); } @@ -471,10 +472,10 @@ namespace REX::teaw } //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); - lheFile->addWgt( 0, nuWgt ); + lheIn->addWgt( 0, nuWgt ); return true; } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, REX::event& ev ){ + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) @@ -487,7 +488,7 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); } @@ -496,10 +497,10 @@ namespace REX::teaw } //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); - lheFile->addWgt( 0, nuWgt ); + lheIn->addWgt( 0, nuWgt ); return true; } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId, + bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, std::string& id, REX::event& ev ){ if( !normWgtSet ) throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); @@ -512,7 +513,7 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - for( auto k = 0 ; k < eventFile.subProcs.size() ; ++k ) + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); } @@ -521,11 +522,11 @@ namespace REX::teaw } //ZW IF MULTIPLE TYPES REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); - lheFile->addWgt( 0, nuWgt ); + lheIn->addWgt( 0, nuWgt ); return true; } - bool rwgtRunner::lheFileWriter( std::shared_ptr lheFile, std::string outputDir ){ - bool writeSuccess = REX::filePusher( outputDir, *lheFile->nodeWriter() ); + bool rwgtRunner::lheFileWriter( std::shared_ptr lheIn, std::string outputDir ){ + bool writeSuccess = REX::filePusher( outputDir, *lheIn->nodeWriter() ); if( !writeSuccess ) throw std::runtime_error( "Failed to write LHE file." ); return true; From ff6b3e1fc6637da4e0fd6fbaefa18655df79dcbc Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 7 Aug 2024 22:05:48 +0200 Subject: [PATCH 19/76] added final necessary functionality for a library based implementation of teawREX --- .gitmodules | 4 + .../iolibs/template_files/gpu/Bridge.h | 6 +- .../template_files/gpu/cudacpp_runner.mk | 4 +- tools/REX/REX.cc | 39 ++++- tools/REX/REX.h | 2 +- tools/REX/rwgt_instance.cc | 16 +- tools/REX/rwgt_runner.cc | 4 +- tools/REX/teawREX.cc | 158 +++++++++++------- 8 files changed, 162 insertions(+), 71 deletions(-) diff --git a/.gitmodules b/.gitmodules index 6fbb5110b6..7ce3f44a13 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,7 @@ path = MG5aMC/mg5amcnlo url = https://github.com/zeniheisser/mg5amcnlo/ branch = rexCPP +[submodule "MG5aMC/"] + url = git@github.com:zeniheisser/mg5amcnlo.git +[submodule "MG5aMC"] + url = git@github.com:zeniheisser/mg5amcnlo.git diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 4bf2198dd1..b030fc65f2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -261,7 +261,11 @@ namespace mg5amcCpu // FIXME: the process instance can happily go out of scope because it is only needed to read parameters? // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads? CPPProcess process( /*verbose=*/false ); - std::string paramCard = "../../Cards/param_card.dat"; +//#ifdef _LIBCOMP_ + std::string paramCard = "../Cards/param_card.dat"; +//#else +// std::string paramCard = "../../Cards/param_card.dat"; +//#endif /* #ifdef __HIPCC__ if( !std::experimental::filesystem::exists( paramCard ) ) paramCard = "../" + paramCard; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk index 98a5a289e9..d8be8cd4e8 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -628,9 +628,9 @@ endif # First target (default goal) ifeq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_checkmain) $(cxx_fcheckmain) $(cxx_rwgtlib) $(if $(GTESTLIBS),$(cxx_testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_rwgtlib) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_checkmain) $(gpu_fcheckmain) $(gpu_rwgtlib) $(if $(GTESTLIBS),$(gpu_testmain)) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_rwgtlib) endif # Target (and build options): debug diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index b321052467..4b193c39fb 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -497,6 +497,9 @@ namespace REX size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); + for( auto child : *structure.getChildren() ){ + children.push_back( std::make_shared(*child) ); + } for( auto child : childs ){ children.push_back( child ); } @@ -602,8 +605,9 @@ namespace REX } void xmlNode::endWriter() { if( isFaux() ){ return; } - auto endSt = xmlFile.find_last_of("<", end); - nodeEnd = xmlFile.substr( endSt, end - endSt ); + //auto endSt = xmlFile.find_last_of("<", end); + //nodeEnd = xmlFile.substr( endSt, end - endSt ); + nodeEnd = "\n"; } void xmlNode::contWriter() { if( hasChildren() ){ @@ -1496,6 +1500,7 @@ namespace REX nodeContent = "\n" + *header.getContent(); for( auto prt : prts ){ nodeContent += *prt->getContent(); + if( nodeContent.back() != '\n' ){ nodeContent += "\n"; } } } void event::childWriter() { @@ -3246,7 +3251,35 @@ namespace REX } } } - transSkel::transSkel( std::shared_ptr lheFile, std::vector& evSet ) : transSkel(*lheFile, evSet){}; + transSkel::transSkel( std::shared_ptr lheFile, std::vector& evSet ) { + this->relProcs = evProcOrder( *lheFile, evSet ); + this->relEvSet = std::vector(evSet.size(), false); + for ( size_t k = 0 ; k < this->relProcs.size() ; ++k ) + { + if( std::find(this->relProcs[k]->begin(), this->relProcs[k]->end(), true) != this->relProcs[k]->end() ) + { + this->relEvSet[k] = true; + } + } + this->procSets = std::vector>>(std::count(this->relEvSet.begin(), this->relEvSet.end(), true)); + auto evs = lheFile->getEvents(); + size_t j = 0; + for( size_t k = 0 ; k < this->relEvSet.size() ; ++k ) + { + if( this->relEvSet[k] ) + { + this->procSets[j] = std::vector>(); + for( size_t m = 0 ; m < relProcs[k]->size() ; ++m ) + { + if( relProcs[k]->at(m) ) + { + this->procSets[j].push_back(evs[m]); + } + } + ++j; + } + } + } // ZW: transposed LHE file with a single process type transMonoLHE::transMonoLHE( const std::vector> lheFile , const int nPrt ){ diff --git a/tools/REX/REX.h b/tools/REX/REX.h index 94344bb193..bc3312d293 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -85,7 +85,7 @@ namespace REX xmlTree(); xmlTree( std::string_view file ); xmlTree( std::string_view file, size_t& strt, size_t& nd ); - auto& getChildren(){ return children; } + auto getChildren(){ return children; } std::string_view& getOrigin(){ return origin; } size_t getStart(){ return start; } size_t getEnd(){ return end; } diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc index a791cd45a4..eb8d05ae44 100644 --- a/tools/REX/rwgt_instance.cc +++ b/tools/REX/rwgt_instance.cc @@ -25,13 +25,17 @@ namespace rwgt{ //ZW: Function for padding the input arrays to a multiple of the warp size template - std::vector& warpPad( std::vector& input, unsigned int nWarp = 32 ){ + void warpPad( std::vector& input, unsigned int nWarp = 32 ){ auto nEvt = input.size(); auto nWarpRemain = warpRemain( nEvt, nWarp ); auto fauxNEvt = nEvt + nWarpRemain; - auto output = std::vector( fauxNEvt ); - std::copy( input.begin(), input.end(), output.begin()); - return output; +// auto output = std::vector( fauxNEvt ); +// std::copy( input.begin(), input.end(), output.begin()); +// input.resize( fauxNEvt ); + for( size_t k = nEvt - nWarpRemain ; k < fauxNEvt ; ++k ){ + input.push_back( input[k] ); + } + return; } fBridge::fBridge(){} @@ -133,8 +137,8 @@ namespace rwgt{ // } // } if( this->bridge == nullptr) throw std::runtime_error("fBridge object not defined."); - alphaS = warpPad( alphaS, nWarp ); - momenta = warpPad( momenta, nWarp * nPar * nMom ); + warpPad( alphaS, nWarp ); + warpPad( momenta, nWarp * nPar * nMom ); auto evalScatAmps = this->bridge(fauxNEvt, nPar, nMom, momenta, alphaS, rndHel, rndCol, selHel, selCol, chanId ); alphaS.resize( nEvt ); momenta.resize( nEvt * nPar * nMom ); diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 59c3801103..e0be856db4 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -11,7 +11,9 @@ // A class for reweighting matrix elements for %(process_lines)s //-------------------------------------------------------------------------- - +#ifndef _LIBCOMP_ +#define _LIBCOMP_ +#endif #include "rwgt_instance.h" #include "fbridge.cc" diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 6a3d7e87a0..3b762eaa21 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -121,7 +121,9 @@ namespace REX::teaw auto procLines = *REX::nuLineSplitter( procString ); for( auto line : procLines ) { + if( line.find_first_not_of(" \n\r\f\t\v") == '#' ){ continue; } auto strtPt = line.find("set"); + if( strtPt == REX::npos ){ continue; } auto words = *REX::nuWordSplitter( line.substr(strtPt) ); auto currBlock = words[1]; auto loc = std::find_if( blocks.begin(), blocks.end(), @@ -142,11 +144,11 @@ namespace REX::teaw rwgtProc::rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet, bool parseOnline ) { if( rwgtSet == "" ){ return; } - auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; - auto endLi = rwgtSet.find("\n", strtLi); - while( rwgtSet[rwgtSet.find_first_not_of("\n ", endLi)] == 's' ) - { endLi = rwgtSet.find( "\n", endLi + 1 ); } - procString = rwgtSet.substr( strtLi, endLi - strtLi ); + auto strtLi = rwgtSet.find_first_not_of( " \n\r\f\n\v" ); + if( strtLi == REX::npos ){ return; } + auto launchPos = rwgtSet.find("launch", strtLi + 1); + auto commLinePos = rwgtSet.find("#*", strtLi + 1); + procString = rwgtSet.substr( strtLi, std::min(launchPos, commLinePos) - strtLi ); if( parseOnline ){ parse(); } } std::shared_ptr rwgtProc::outWrite( const REX::lesHouchesCard& paramOrig ){ @@ -163,62 +165,104 @@ namespace REX::teaw } std::string_view rwgtProc::comRunProc(){ return procString; } - void rwgtCard::parse( bool parseOnline ) { - auto strt = srcCard.find("launch"); - auto commPos = srcCard.find_last_of("#", strt); - while( commPos > srcCard.find_last_of("\n", strt) ){ - if( commPos == REX::npos ){ - break; - } - strt = srcCard.find("launch", strt + 6 ); - } - while( auto chPos = srcCard.find( "set" ) < strt ){ - if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } - opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); - } - std::vector lnchPos({strt}); - auto nuLnch = srcCard.find( "launch", strt + 6 ); - while ( nuLnch != std::string_view::npos ) + // void rwgtCard::parse( bool parseOnline ) { + // auto strt = srcCard.find("launch"); + // auto commPos = srcCard.find_last_of("#", strt); + // while( commPos > srcCard.find_last_of("\n", strt) ){ + // if( commPos == REX::npos ){ + // break; + // } + // strt = srcCard.find("launch", strt + 6 ); + // } + // while( auto chPos = srcCard.find( "set" ) < strt ){ + // if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } + // opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); + // } + // std::vector lnchPos({strt}); + // auto nuLnch = srcCard.find( "launch", strt + 6 ); + // while ( nuLnch != std::string_view::npos ) + // { + // if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } + // nuLnch = srcCard.find( "launch", nuLnch + 6 ); + // } + // for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) + // { + // auto strtLi = srcCard.find( "set", lnchPos[k] ); + // rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); + // if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ + // auto strtPos = srcCard.find( "--", lnchPos[k] ); + // while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ + // auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); + // rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); + // if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ + // rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. + // rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); + // } + // if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } + // strtPos = nuStrtPos; + // } + // } + // } + // size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); + // if( srcCard.substr( endLi + 1, 3 ) == "set" ){ + // while( srcCard.substr( endLi + 1, 3 ) == "set" ) + // { + // endLi = srcCard.find( "\n", endLi + 1 ); + // } + // rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); + // } + // rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); + // rwgtNames.reserve( rwgtRuns.size() ); + // int p = 1; + // for( auto run : rwgtRuns ){ + // rwgtProcs.push_back( run.comRunProc() ); + // if( run.rwgtName == "" ){ + // rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); + // } else { + // rwgtNames.push_back( std::string(run.rwgtName) ); + // } + // } + // } + void rwgtCard::parse( bool parseOnline ){ + auto allLaunchPos = REX::nuFindEach( this->srcCard, "launch" ); + std::vector lnchPos; + lnchPos.reserve( allLaunchPos->size() ); + for( auto pos : *allLaunchPos ) { - if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } - nuLnch = srcCard.find( "launch", nuLnch + 6 ); + if( pos == 0 ){ lnchPos.push_back(pos); continue; } + if( srcCard.find_last_of("#", pos) < srcCard.find_last_of("\n", pos) ){ lnchPos.push_back(pos); } } - for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) + lnchPos.push_back( REX::npos ); + auto preamble = REX::nuLineSplitter( srcCard.substr( 0, lnchPos[0] - 1 ) ); + for( auto line : *preamble ) { - auto strtLi = srcCard.find( "set", lnchPos[k] ); - rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); - if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ - auto strtPos = srcCard.find( "--", lnchPos[k] ); - while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ - auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); - if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. - rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); - } - if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } - strtPos = nuStrtPos; - } - } + if( line[line.find_first_not_of(" \n\r\f\t\v")] == '#' ){ continue; } + opts.push_back( line ); } - size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); - if( srcCard.substr( endLi + 1, 3 ) == "set" ){ - while( srcCard.substr( endLi + 1, 3 ) == "set" ) - { - endLi = srcCard.find( "\n", endLi + 1 ); + for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ){ + auto setPos = srcCard.find( "set", lnchPos[k] ); + if( setPos == REX::npos ){ continue; } + rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( setPos, lnchPos[k+1] - setPos ), parseOnline ) ); + auto possNamePos = srcCard.find_first_of( "-\n#", lnchPos[k] ); + if( srcCard[possNamePos] == '-' ){ + auto endLine = srcCard.find( "\n", possNamePos ); + auto opts = srcCard.substr( possNamePos, endLine - possNamePos ); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( opts ); + auto namePos = opts.find( "rwgt_name" ); + if( namePos != REX::npos ){ + auto endName = opts.find_first_of( " \n\r\f\t\v", namePos ); + rwgtNames.push_back( std::string( opts.substr( namePos + 9, endName - namePos - 9 ) ) ); + } else { + rwgtNames.push_back( "rwgt_" + std::to_string( k + 1 ) ); + } + } else { + rwgtNames.push_back( "rwgt_" + std::to_string( k + 1 ) ); } - rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtNames[ rwgtNames.size() - 1 ]; } rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); - rwgtNames.reserve( rwgtRuns.size() ); - int p = 1; for( auto run : rwgtRuns ){ rwgtProcs.push_back( run.comRunProc() ); - if( run.rwgtName == "" ){ - rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); - } else { - rwgtNames.push_back( std::string(run.rwgtName) ); - } } } rwgtCard::rwgtCard( std::string_view reweight_card ){ @@ -315,7 +359,7 @@ namespace REX::teaw void rwgtCollection::setSkeleton( std::vector& evSets ){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); - this->lheSkeleton = transSkel( lheFile, evSets ); + this->lheSkeleton = transSkel( this->lheFile, evSets ); this->skeleton = true; } void rwgtCollection::setDoublesFromSkeleton(){ @@ -520,10 +564,10 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - // { - // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - // } + for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) + { + nuMEs.push_back(meVec[k]( *(momenta[k]), *(gS[k]) )); + } std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } From 4b9b82ebaaa543dc3e7ec65f5583f039728ad503 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Fri, 9 Aug 2024 16:42:40 +0200 Subject: [PATCH 20/76] fixed handling for amps with multiple parton sets, now treats them properly --- .../iolibs/template_files/gpu/Bridge.h | 6 +- .../gpu/MatrixElementKernels.cc | 15 ++--- .../iolibs/template_files/gpu/process_cc.inc | 2 +- tools/REX/REX.cc | 14 +++++ tools/REX/REX.h | 2 + tools/REX/teawREX.cc | 55 ++++++++++++++++++- tools/REX/teawREX.h | 8 +++ 7 files changed, 88 insertions(+), 14 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index b030fc65f2..1529b0c92a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -250,11 +250,11 @@ namespace mg5amcCpu throw std::logic_error( "Bridge constructor: FIXME! cannot choose gputhreads" ); // this should never happen! m_gpublocks = m_nevt / m_gputhreads; } - std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads - << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; + //std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads + // << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; m_pmek.reset( new MatrixElementKernelDevice( m_devMomentaC, m_devGs, m_devRndHel, m_devRndCol, m_devMEs, m_devSelHel, m_devSelCol, m_gpublocks, m_gputhreads ) ); #else - std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl; + //std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl; m_pmek.reset( new MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) ); #endif // MGONGPUCPP_GPUIMPL // Create a process object, read param card and set parameters diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index b73dfab583..f91ff393ab 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -35,9 +35,7 @@ namespace mg5amcCpu if( std::fetestexcept( FE_OVERFLOW ) ) fpes += " FE_OVERFLOW"; if( std::fetestexcept( FE_UNDERFLOW ) ) fpes += " FE_UNDERFLOW"; //if( std::fetestexcept( FE_INEXACT ) ) fpes += " FE_INEXACT"; // do not print this out: this would almost always signal! - if( fpes == "" ) - std::cout << "INFO: No Floating Point Exceptions have been reported" << std::endl; - else + if( fpes != "" ) std::cerr << "INFO: The following Floating Point Exceptions have been reported:" << fpes << std::endl; } @@ -163,15 +161,14 @@ namespace mg5amcCpu #endif if( verbose ) { - if( tag == "none" ) - std::cout << "INFO: The application does not require the host to support any AVX feature" << std::endl; - else if( ok && known ) - std::cout << "INFO: The application is built for " << tag << " and the host supports it" << std::endl; - else if( ok ) + if( tag != "none" ){ + //std::cout << "INFO: The application does not require the host to support any AVX feature" << std::endl; + if( ok && !known ) std::cout << "WARNING: The application is built for " << tag << " but it is unknown if the host supports it" << std::endl; - else + else if ( !ok && known ) std::cout << "ERROR! The application is built for " << tag << " but the host does not support it" << std::endl; } + } return ok; } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc index 2518502d81..93b0c14f9f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc @@ -64,7 +64,7 @@ fpeEnable() constexpr bool enableFPE = true; // this is hardcoded and no longer controlled by getenv( "CUDACPP_RUNTIME_ENABLEFPE" ) if( enableFPE ) { - std::cout << "INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW" << std::endl; + //std::cout << "INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW" << std::endl; feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW ); // new strategy #831 (do not enable FE_UNDERFLOW) //fpes = fegetexcept(); //std::cout << "fpeEnable: analyse fegetexcept()=" << fpes << std::endl; diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index 4b193c39fb..0d3f14bb1f 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -514,6 +514,11 @@ namespace REX end = structure.getEnd(); size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start); name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); + auto possTags = xmlFile.substr(trueStart + name.size(), structure.getContStart() - trueStart - name.size() ); + if( possTags.find("=") != npos ){ + size_t eqSgn = possTags.find("="); + while( eqSgn < possTags.size() ){ tags.push_back( xmlTagParser( possTags, eqSgn ) ); } + } content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); for( auto& child : *(structure.getChildren()) ){ children.push_back( std::make_shared( *child ) ); @@ -3306,6 +3311,11 @@ namespace REX evtsData = prtInfo(lheFile, nPrt, statVec, sorter); process = lheFile[0]; } + transMonoLHE::transMonoLHE( const transMonoLHE& lheFile ){ + evtsHead = lheFile.evtsHead; + evtsData = lheFile.evtsData; + process = lheFile.process; + } // ZW: transposed LHE file ordered by subprocess transLHE::transLHE(){ return; } @@ -3369,6 +3379,10 @@ namespace REX subProcs[k] = std::make_shared( skeleton.procSets[k], skeleton.procSets[k].at(0)->getNprt() ); } } + transLHE::transLHE( const transLHE& lheFile ){ + relProcs = lheFile.relProcs; + subProcs = lheFile.subProcs; + } // template std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) { diff --git a/tools/REX/REX.h b/tools/REX/REX.h index bc3312d293..dfea597d2f 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -788,6 +788,7 @@ namespace REX transMonoLHE( const std::vector> lheFile, const int nPrt, statSort sorter, std::vector statVec = { "-1", "1" } ); + transMonoLHE( const transMonoLHE& lheFile ); }; struct transLHE { @@ -806,6 +807,7 @@ namespace REX const std::vector& statVec = { "-1", "1" } ); transLHE( lheNode& lheFile, const std::vector& statVec ); transLHE( transSkel& skeleton ); + transLHE( const transLHE& lheFile ); std::shared_ptr> vectorFlat( std::vector>> vecVec ); }; diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 3b762eaa21..5eb105f467 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -329,6 +329,19 @@ namespace REX::teaw setSlha( slha ); setRwgt( rwgts ); } + rwgtCollection::rwgtCollection( const rwgtCollection& rwgts ){ + rwgtSets = rwgts.rwgtSets; + slhaParameters = rwgts.slhaParameters; + lheFile = rwgts.lheFile; + wgts = rwgts.wgts; + gS = rwgts.gS; + momenta = rwgts.momenta; + lheFileSet = rwgts.lheFileSet; + slhaSet = rwgts.slhaSet; + rwgtSet = rwgts.rwgtSet; + skeleton = rwgts.skeleton; + eventFile = rwgts.eventFile; + } REX::transSkel& rwgtCollection::getSkeleton(){ if( !this->skeleton ) throw std::runtime_error( "Skeleton has not been set." ); @@ -343,6 +356,11 @@ namespace REX::teaw void rwgtCollection::setDoubles(Args&&... args){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); + if( this->doublesSet ){ return; } + if( this->skeleton ){ + this->setDoublesFromSkeleton(); + return; + } REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; eventFile = REX::transLHE( *lheFile, args... ); auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); @@ -355,16 +373,18 @@ namespace REX::teaw gS.push_back( vecOfVecs->at( 3*k + 1 ) ); momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } + this->doublesSet = true; } void rwgtCollection::setSkeleton( std::vector& evSets ){ if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); - this->lheSkeleton = transSkel( this->lheFile, evSets ); + this->lheSkeleton = REX::transSkel( this->lheFile, evSets ); this->skeleton = true; } void rwgtCollection::setDoublesFromSkeleton(){ if( !this->skeleton ) throw std::runtime_error( "Skeleton has not been set." ); + if( this->doublesSet ){ return; } REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; this->eventFile = REX::transLHE( this->lheSkeleton ); auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); @@ -376,8 +396,12 @@ namespace REX::teaw gS.push_back( vecOfVecs->at( 3*k + 1 ) ); momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } + this->doublesSet = true; } + bool rwgtFiles::rwgtPulled(){ return (rewgtCard != nullptr); } + bool rwgtFiles::slhaPulled(){ return (slhaCard != nullptr); } + bool rwgtFiles::lhePulled(){ return (lheCard != nullptr); } void rwgtFiles::setRwgtPath( std::string_view path ){ rwgtPath = path; } void rwgtFiles::setSlhaPath( std::string_view path ){ slhaPath = path; } void rwgtFiles::setLhePath( std::string_view path ){ lhePath = path; } @@ -387,17 +411,29 @@ namespace REX::teaw setSlhaPath( slha_card ); setLhePath( lhe_card ); } + rwgtFiles::rwgtFiles( const rwgtFiles& rwgts ) : rwgtCollection( rwgts ){ + rwgtPath = rwgts.rwgtPath; + slhaPath = rwgts.slhaPath; + lhePath = rwgts.lhePath; + rewgtCard = rwgts.rewgtCard; + slhaCard = rwgts.slhaCard; + lheCard = rwgts.lheCard; + initialised = rwgts.initialised; + } REX::transSkel& rwgtFiles::initCards( std::vector& evSets ){ + if( initialised ){ return getSkeleton( evSets ); } if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); this->pullRwgt(); this->pullSlha(); this->pullLhe(); this->setLhe( *lheCard ); this->setSlha( std::make_shared( *slhaCard ) ); this->setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); + this->initialised = true; return this->getSkeleton( evSets ); } template void rwgtFiles::initCards(Args&&... args){ + if( initialised ){ return; } if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); pullRwgt(); pullSlha(); pullLhe(); @@ -405,6 +441,7 @@ namespace REX::teaw setSlha( std::make_shared( *slhaCard ) ); setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); setDoubles(args...); + initialised = true; } template void rwgtFiles::initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ @@ -412,6 +449,7 @@ namespace REX::teaw setSlhaPath( slha_card ); setRwgtPath( reweight_card ); initCards(args...); + initialised = true; } void rwgtFiles::initDoubles(){ if( !this->skeleton ) @@ -419,12 +457,15 @@ namespace REX::teaw this->setDoublesFromSkeleton(); } void rwgtFiles::pullRwgt(){ + if( this->rwgtPulled() ){ return; } rewgtCard = REX::filePuller( rwgtPath ); } void rwgtFiles::pullSlha(){ + if( this->slhaPulled() ){ return; } slhaCard = REX::filePuller( slhaPath ); } void rwgtFiles::pullLhe(){ + if( this->lhePulled() ){ return; } lheCard = REX::filePuller( lhePath ); } @@ -455,6 +496,18 @@ namespace REX::teaw meEval = meCalc; meInit = true; } + rwgtRunner::rwgtRunner( const rwgtRunner& rwgts ) : rwgtFiles( rwgts ){ + this->meInit = rwgts.meInit; + this->meCompInit = rwgts.meCompInit; + this->meSet = rwgts.meSet; + this->normWgtSet = rwgts.normWgtSet; + this->meEval = rwgts.meEval; + this->meVec = rwgts.meVec; + this->initMEs = rwgts.initMEs; + this->meNormWgts = rwgts.meNormWgts; + this->normWgt = rwgts.normWgt; + this->rwgtGroup = rwgts.rwgtGroup; + } // rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, // ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ // meEvals = meCalcs; diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h index 7c1120c5b9..326d15de44 100644 --- a/tools/REX/teawREX.h +++ b/tools/REX/teawREX.h @@ -102,6 +102,7 @@ namespace REX::teaw REX::transSkel& getSkeleton( std::vector& evSets ); rwgtCollection(); rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ); + rwgtCollection( const rwgtCollection& rwgts ); protected: template void setDoubles(Args&&... args); @@ -117,6 +118,7 @@ namespace REX::teaw bool slhaSet = false; bool rwgtSet = false; bool skeleton = false; + bool doublesSet = false; REX::transLHE eventFile; REX::transSkel lheSkeleton; }; @@ -127,6 +129,7 @@ namespace REX::teaw void setLhePath( std::string_view path ); rwgtFiles(); rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ); + rwgtFiles( const rwgtFiles& rwgts ); REX::transSkel& initCards( std::vector& evSets); void initDoubles(); template @@ -134,9 +137,13 @@ namespace REX::teaw template void initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ); protected: + bool rwgtPulled(); + bool slhaPulled(); + bool lhePulled(); void pullRwgt(); void pullSlha(); void pullLhe(); + bool initialised = false; std::string rwgtPath; std::string lhePath; std::string slhaPath; @@ -157,6 +164,7 @@ namespace REX::teaw rwgtRunner( rwgtFiles& rwgts, std::vector& meCalcs ); rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, amplitude meCalc ); + rwgtRunner(const rwgtRunner& rwgts); //rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, //ampCall meCalcs ); bool oneME(); From 12ae2f86296da4cc5ecba3fd0f730931676bf79c Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 9 Sep 2024 11:18:58 +0200 Subject: [PATCH 21/76] lots of bugfixes with indexing and memory management --- .../iolibs/template_files/gpu/Bridge.h | 4 +- .../template_files/gpu/MatrixElementKernels.h | 4 +- .../template_files/gpu/cudacpp_driver.mk | 14 +- .../gpu/process_function_definitions.inc | 2 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 1 + tools/REX/REX.cc | 157 +++++++++++------- tools/REX/REX.h | 15 +- tools/REX/rwgt_driver.cc | 46 +++-- tools/REX/rwgt_instance.cc | 5 +- tools/REX/rwgt_runner.cc | 42 +++-- tools/REX/rwgt_runner.h | 2 +- tools/REX/teawREX.cc | 132 ++++++++++++--- tools/REX/teawREX.h | 16 +- 13 files changed, 316 insertions(+), 124 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 1529b0c92a..0bff5b5bf2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -342,7 +342,7 @@ namespace mg5amcCpu if( goodHelOnly ) return; m_pmek->computeMatrixElements( channelId ); copyHostFromDevice( m_hstMEs, m_devMEs ); - flagAbnormalMEs( m_hstMEs.data(), m_nevt ); + //flagAbnormalMEs( m_hstMEs.data(), m_nevt ); copyHostFromDevice( m_hstSelHel, m_devSelHel ); copyHostFromDevice( m_hstSelCol, m_devSelCol ); if constexpr( std::is_same_v ) @@ -392,7 +392,7 @@ namespace mg5amcCpu } if( goodHelOnly ) return; m_pmek->computeMatrixElements( channelId ); - flagAbnormalMEs( m_hstMEs.data(), m_nevt ); + //flagAbnormalMEs( m_hstMEs.data(), m_nevt ); if constexpr( std::is_same_v ) { memcpy( mes, m_hstMEs.data(), m_hstMEs.bytes() ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h index 9256dabeac..d60325d17e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h @@ -101,7 +101,7 @@ namespace mg5amcCpu const size_t nevt ); // Destructor - virtual ~MatrixElementKernelHost() { MatrixElementKernelBase::dumpSignallingFPEs(); } + virtual ~MatrixElementKernelHost() { }//MatrixElementKernelBase::dumpSignallingFPEs(); } // Compute good helicities (returns nGoodHel, the number of good helicity combinations out of ncomb) int computeGoodHelicities() override final; @@ -153,7 +153,7 @@ namespace mg5amcCpu const size_t gputhreads ); // Destructor - virtual ~MatrixElementKernelDevice() { MatrixElementKernelBase::dumpSignallingFPEs(); } + virtual ~MatrixElementKernelDevice() { }//MatrixElementKernelBase::dumpSignallingFPEs(); } // Reset gpublocks and gputhreads void setGrid( const int gpublocks, const int gputhreads ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk index 867da1fdb1..e7ce3052d5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk @@ -615,14 +615,14 @@ override RUNTIME = DIRS := $(wildcard P*) # Construct the library paths -cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) -gpu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_$(GPUSUFFIX)"}'; done) +cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "-l mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) +gpu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "-l mg5amc_"$$(NF-1)"_"$$NF"_$(GPUSUFFIX)"}'; done) ifeq ($(GPUCC),) cxx_rwgt=$(BUILDDIR)/rwgt_driver_cpp.exe rwgtlib := $(addprefix ,$(addsuffix /librwgt_cpp.so,$(DIRS))) else - gpu_rwgt=$(BUILDDIR)/rwgt_driver_$(GPUSUFFIX).exe + gpu_rwgt=$(BUILDDIR)/rwgt_driver_gpu.exe rwgtlib := $(addprefix ,$(addsuffix /librwgt_$(GPUSUFFIX).so,$(DIRS))) endif @@ -662,7 +662,7 @@ endif # $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) # $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/rwgt_driver_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -$(BUILDDIR)/rwgt_driver_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) +$(BUILDDIR)/rwgt_driver_gpu.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) # # Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o # $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) @@ -772,7 +772,7 @@ $(rwgtlib): # Target (and build rules): C++ and CUDA/HIP standalone executables $(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(rwgtlib) - $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_proclibs) $(rwgtlib) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) $(cxx_proclibs) $(rwgtlib) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -782,8 +782,8 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(gpu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(gpu_rwgt): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_rwgt): $(BUILDDIR)/$(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(DIRS) - $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) -l$(gpu_proclibs) $(rwgtlib) +$(gpu_rwgt): $(BUILDDIR)/$(BUILDDIR)/rwgt_driver.o $(rwgtlib) + $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(gpu_proclibs) $(rwgtlib) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index 71e99c6ab7..a4d4201f36 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -117,7 +117,7 @@ namespace mg5amcCpu #else memcpy( cHel, tHel, ncomb * npar * sizeof( short ) ); #endif - fpeEnable(); // enable SIGFPE traps for Floating Point Exceptions + //fpeEnable(); // enable SIGFPE traps for Floating Point Exceptions } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 2287a58b84..adf77b290a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2206,6 +2206,7 @@ def edit_rwgt_runner(self): replace_dict['init_prt_ids'] = self.get_init_prts_vecs(self.matrix_elements[0].get('processes')) replace_dict['fin_prt_ids'] = self.get_fin_prts_vecs(self.matrix_elements[0].get('processes')) replace_dict['process_events'] = self.get_rwgt_legs_vec(self.matrix_elements[0].get('processes')) + replace_dict['no_events'] = len(self.matrix_elements[0].get('processes')) template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') ff.write(template % replace_dict) diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index 0d3f14bb1f..c445493f6e 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -100,10 +100,10 @@ namespace REX if (it != indexMap.end() && !it->second.empty()) { order->at(pos) = (it->second.front()); it->second.pop(); - } //else { + } else { // Element in vec2 not found in vec1 - // order->at(pos) = npos; - //} + order->at(pos) = npos; + } ++pos; } @@ -143,27 +143,27 @@ namespace REX // ZW: fcn for splitting a string into a vector of strings, // each element differentiated by linebreaks in the original string - // Removes sequential linebreaks, ie "\n\n\n" would - // only result in a single element separation + // Removes sequential linebreaks, as well as leading blankspace std::shared_ptr> nuLineSplitter( std::string_view currEvt ) { auto lineBreaks = nuFindEach( currEvt, "\n" ); - std::vector trueBreaks; - trueBreaks.reserve( lineBreaks->size() ); + auto splitLines = std::make_shared>(); + if( lineBreaks->at(0) == npos ){ splitLines->push_back( currEvt ); return splitLines; } + splitLines->reserve( lineBreaks->size() ); + auto strtLine = currEvt.substr( 0, lineBreaks->at(0) ); + if( strtLine.size() > 0 ){ splitLines->push_back( strtLine ); } for( size_t k = 0 ; k < lineBreaks->size() - 1 ; ++k ) { - if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} - trueBreaks.push_back( (*lineBreaks)[k] ); + auto strtPos = currEvt.substr( lineBreaks->at(k), lineBreaks->at(k+1) - lineBreaks->at(k) ).find_first_not_of(" \n\r\f\t\v"); + if( strtPos == npos ){ continue; } + splitLines->push_back( currEvt.substr( lineBreaks->at(k) + 1, lineBreaks->at(k+1) - lineBreaks->at(k) - 1 ) ); } - auto splitLines = std::make_shared>(); - splitLines->reserve( trueBreaks.size() ); - size_t startPos = 0; - for( auto k : trueBreaks ) + size_t nuStrtPs = currEvt.substr( lineBreaks->at( lineBreaks->size() - 1 ) ).find_first_not_of(" \n\r\f\t\v"); + if( nuStrtPs != npos ) { - splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); - startPos = k; + size_t endPs = currEvt.find_last_not_of(" \n\r\f\t\v"); + splitLines->push_back( currEvt.substr( nuStrtPs, endPs - nuStrtPs ) ); } - if( currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } return splitLines; } @@ -217,17 +217,15 @@ namespace REX // Ignores sequential blankspaces of all forms std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) { - auto lines = nuLineSplitter( currEvt ); + auto strtPos = currEvt.find_first_not_of(" \n\r\f\t\v"); auto splitString = std::make_shared>(); - splitString->reserve( lines->size() * lines->at(0).size() ); - for( auto line : *lines ) + if( strtPos == npos ){ splitString->push_back( currEvt ); return splitString; } + auto endPos = currEvt.find_first_of(" \n\r\f\t\v", strtPos); + while( strtPos != npos ) { - auto words = nuWordSplitter(line); - for( auto word : *words ) - { - if( word == "" || word == "\n" || word == " " ){continue;} - splitString->push_back( word ); - } + splitString->push_back( currEvt.substr( strtPos, endPos - strtPos ) ); + strtPos = currEvt.find_first_not_of(" \n\r\f\t\v", endPos); + endPos = currEvt.find_first_of(" \n\r\f\t\v", strtPos); } return splitString; } @@ -524,6 +522,26 @@ namespace REX children.push_back( std::make_shared( *child ) ); } } + xmlNode::xmlNode( const xmlNode& original ){ + this->nodeHeader = original.nodeHeader; + this->nodeContent = original.nodeContent; + this->nodeEnd = original.nodeEnd; + this->structure = original.structure; + this->children = original.children; + this->tags = original.tags; + this->writtenSelf = original.writtenSelf; + this->deepMod = original.deepMod; + this->xmlFile = original.xmlFile; + this->name = original.name; + this->content = original.content; + this->start = original.start; + this->end = original.end; + this->modded = original.modded; + this->written = original.written; + this->parsed = original.parsed; + this->deepParsed = original.deepParsed; + this->faux = original.faux; + } std::vector> xmlNode::getChildren(){ return children; } std::vector> xmlNode::getTags(){ return tags; } std::string_view xmlNode::getFile(){ return xmlFile; } @@ -1419,6 +1437,8 @@ namespace REX bool event::initProcMap(bool hard) { if(!hard){ if( procMap.size() > 0 ){ return true; } } + procMap.clear(); + procOrder.clear(); for( auto prt : prts ){ procMap.insert({prt->getStatus(), std::vector()}); procOrder.insert({prt->getStatus(), std::vector()}); @@ -1435,6 +1455,8 @@ namespace REX bool event::initProcMap( sortFcn sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } + procMap.clear(); + procOrder.clear(); specSorted = false; eventSort = sorter; for( auto prt : prts ){ @@ -1453,6 +1475,8 @@ namespace REX bool event::initProcMap( statSort sorter, bool hard ) { if(!hard){ if( procMap.size() > 0 ){ return true; } } + procMap.clear(); + procOrder.clear(); specSorted = true; specSort = sorter; for( auto prt : prts ){ @@ -1503,6 +1527,7 @@ namespace REX } void event::contWriter() { nodeContent = "\n" + *header.getContent(); + if( nodeContent.back() != '\n' ){ nodeContent += "\n"; } for( auto prt : prts ){ nodeContent += *prt->getContent(); if( nodeContent.back() != '\n' ){ nodeContent += "\n"; } @@ -1562,12 +1587,12 @@ namespace REX if( addedWgt ){ appendWgts(); } return writtenSelf; } - std::map> &event::getProc(){ - if( initProcMap() ){ return procMap; } + std::map> &event::getProc( bool hard ){ + if( initProcMap(hard) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - std::map> &event::getProcOrder(){ - if( initProcMap() ){ return procOrder; } + std::map> &event::getProcOrder( bool hard ){ + if( initProcMap(hard) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } std::map> event::getProc() const { @@ -1578,20 +1603,20 @@ namespace REX if ( hasBeenProc ){ return procOrder; } else throw std::runtime_error("Const declaration of event node before it has been procesed."); } - std::map> &event::getProc(sortFcn sorter){ - if( initProcMap(sorter) ){ return procMap; } + std::map> &event::getProc(sortFcn sorter, bool hard){ + if( initProcMap(sorter, hard) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - std::map> &event::getProcOrder(sortFcn sorter){ - if( initProcMap(sorter) ){ return procOrder; } + std::map> &event::getProcOrder(sortFcn sorter, bool hard){ + if( initProcMap(sorter, hard) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } - std::map> &event::getProc(statSort sorter){ - if( initProcMap(sorter) ){ return procMap; } + std::map> &event::getProc(statSort sorter, bool hard){ + if( initProcMap(sorter, hard) ){ return procMap; } else throw std::runtime_error("Error while parsing event node."); } - std::map> &event::getProcOrder(statSort sorter){ - if( initProcMap(sorter) ){ return procOrder; } + std::map> &event::getProcOrder(statSort sorter, bool hard){ + if( initProcMap(sorter, hard) ){ return procOrder; } else throw std::runtime_error("Error while parsing event node."); } @@ -1905,7 +1930,8 @@ namespace REX std::shared_ptr lesHouchesCard::selfWrite(){ auto writeCard = std::make_shared(header); if( isMod() ) - { for( auto block : blocks ) + { + for( auto block : blocks ) { *writeCard += *block.selfWrite(); } *writeCard += *decays.selfWrite(); } else{ @@ -1975,40 +2001,33 @@ namespace REX initNode::initNode() : xmlNode(){ name = "init"; } initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) : xmlNode( originFile, begin ){ - content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } initNode::initNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); if( parseOnline ){ parse( parseOnline ); } } bool initNode::parseContent(){ if( content.size() == 0 ){ return false; } - auto linebreaks = lineFinder( content ); - if( linebreaks->size() == 0 ){ return false; } - initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); - for( size_t k = 0 ; k < linebreaks->size() - 1 ; ++k ){ - initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); + auto lines = nuLineSplitter( content ); + if( lines->size() == 0 ){ return false; } + initHead = std::make_shared(lines->at(0) ); + for( size_t k = 1 ; k < lines->size() ; ++k ){ + initLines.push_back( std::make_shared( lines->at(k) ) ); } return true; } @@ -2243,7 +2262,7 @@ namespace REX //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} for( auto child : children ){ if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } - if( child->getName() == "init" ){ init = std::make_shared( *child ); continue; } + if( child->getName() == "init" ){ init = std::make_shared( *child, true ); continue; } if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } } } @@ -3323,6 +3342,7 @@ namespace REX { procSets = evProcessPull( lheFile ); relProcs = evProcOrder( lheFile, procSets ); + this->setRelEvSets(); xmlFile = lheFile.getFile(); auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs ); subProcs = std::vector>( procsOrdered.size() ); @@ -3337,6 +3357,7 @@ namespace REX { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + this->setRelEvSets(); xmlFile = lheFile.getFile(); auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); subProcs = std::vector>( procsOrdered.size() ); @@ -3351,6 +3372,7 @@ namespace REX { procSets = evProcessPull( lheFile, sorter, statVec ); relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); + this->setRelEvSets(); xmlFile = lheFile.getFile(); auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); subProcs = std::vector>( procsOrdered.size() ); @@ -3363,6 +3385,7 @@ namespace REX { procSets = evProcessPull( lheFile, statVec ); relProcs = evProcOrder( lheFile, procSets, statVec ); + this->setRelEvSets(); xmlFile = lheFile.getFile(); auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, statVec ); subProcs = std::vector>( procsOrdered.size() ); @@ -3374,6 +3397,7 @@ namespace REX transLHE::transLHE( transSkel& skeleton ){ relProcs = skeleton.relProcs; subProcs = std::vector>( skeleton.procSets.size() ); + relEvSets = skeleton.relEvSet; for( size_t k = 0 ; k < skeleton.procSets.size() ; ++k ) { subProcs[k] = std::make_shared( skeleton.procSets[k], skeleton.procSets[k].at(0)->getNprt() ); @@ -3383,20 +3407,39 @@ namespace REX relProcs = lheFile.relProcs; subProcs = lheFile.subProcs; } + void transLHE::setRelEvSets(){ + relEvSets = std::vector(relProcs.size(), false); + for ( size_t k = 0 ; k < this->relProcs.size() ; ++k ) + { + if( std::find(this->relProcs[k]->begin(), this->relProcs[k]->end(), true) != this->relProcs[k]->end() ) + { + this->relEvSets[k] = true; + } + } + } // template std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) { - if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); + bool allRel = (vecVec.size() == relProcs.size()); + bool justRel = (vecVec.size() == std::count(relEvSets.begin(), relEvSets.end(), true)); + std::vector relInds; + if( !(allRel || justRel) ) throw std::range_error("vectorFlat: input vector size does not match number of (relevant) subprocesses"); + for( size_t k = 0; k < relEvSets.size(); ++k ) + { + if( allRel ){ relInds.push_back(k); } + else if( relEvSets[k] ){ relInds.push_back(k); } + } + size_t totVec = 0; for( size_t k = 0 ; k < vecVec.size() ; ++k){ - if( vecVec[k]->size() == relProcs[k]->size() ) continue; - else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); + totVec += vecVec[k]->size(); } + if( totVec != relProcs[0]->size() ) throw std::range_error("vectorFlat: sum of input vector sizes does not match total number of events"); auto flatVec = std::make_shared>(relProcs[0]->size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ){ + for( size_t k = 0 ; k < relInds.size() ; ++k ){ size_t currInd = 0; for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ - if( relProcs[k]->at(j) ){ - flatVec->at(currInd) = vecVec[k]->at(currInd); + if( relProcs[relInds[k]]->at(j) ){ + flatVec->at(j) = vecVec[k]->at(currInd); ++currInd; } } diff --git a/tools/REX/REX.h b/tools/REX/REX.h index dfea597d2f..4130f53a6f 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -110,6 +110,7 @@ namespace REX xmlNode(); xmlNode( const std::string_view originFile, const size_t& begin = 0, const std::vector>& childs = {} ); xmlNode( xmlTree &tree ); + xmlNode( const xmlNode& original ); std::vector> getChildren(); std::vector> getTags(); std::string_view getFile(); @@ -361,14 +362,14 @@ namespace REX public: std::shared_ptr nodeWriter() override; std::shared_ptr nodeWriter( bool recursive ); - std::map> &getProc(); - std::map> &getProcOrder(); + std::map> &getProc( bool hard = false ); + std::map> &getProcOrder( bool hard = false ); std::map> getProc() const; std::map> getProcOrder() const; - std::map> &getProc(sortFcn sorter); - std::map> &getProcOrder(sortFcn sorter); - std::map> &getProc(statSort sorter); - std::map> &getProcOrder(statSort sorter); + std::map> &getProc(sortFcn sorter, bool hard = true); + std::map> &getProcOrder(sortFcn sorter, bool hard = true); + std::map> &getProc(statSort sorter, bool hard = true); + std::map> &getProcOrder(statSort sorter, bool hard = true); }; using eventComparison = std::function&)>; @@ -797,6 +798,8 @@ namespace REX std::vector> subProcs; std::vector> procSets; std::vector>> relProcs; + std::vector relEvSets; + void setRelEvSets(); transLHE(); transLHE( lheNode& lheFile ); transLHE( lheNode& lheFile, diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 77b0efd928..555d21c7ee 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -29,6 +29,22 @@ int usage( char* argv0, int ret = 1 ) return ret; } +void writeRwgtCsv( std::string path, std::shared_ptr> names, std::shared_ptr> xSecs, std::shared_ptr> errXSecs ) +{ + std::ofstream outFile; + outFile.open( path ); + if( !outFile.is_open() ) + throw std::runtime_error( "Failed to open output file for writing." ); + if( names->size() != xSecs->size() || names->size() != errXSecs->size() ) + throw std::runtime_error( "Mismatch in number of processes, cross-sections, and errors when logging results." ); + //outFile << "Process, Cross-Section, Error\n"; + for( size_t k = 0 ; k < names->size() ; ++k ) + { + outFile << names->at(k) << ", " << xSecs->at(k) << ", " << errXSecs->at(k) << "\n"; + } + outFile.close(); + return; +} int main( int argc, char** argv ){ std::cout << "Starting reweighting driver...\n"; @@ -95,29 +111,25 @@ int main( int argc, char** argv ){ static REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); - static std::vector runSet = {%(run_set)s}; - // std::vector runSet; static REX::transSkel loadEvs = fileCol.initCards( runSet ); - fileCol.initDoubles(); - // static std::vector&, unsigned int )>> fBridgeConstr; static std::vector fBridgeVec = {%(fbridge_vec)s}; - static std::vector bridges; - static std::vector amps; - + size_t relSet = 0; for( size_t k = 0 ; k < runSet.size() ; ++k ){ if( !loadEvs.relEvSet[k] ){ continue; } - fBridgeVec[k].init( loadEvs.procSets[k], 32 ); + fBridgeVec[k].init( loadEvs.procSets[relSet], 32 ); bridges.push_back( fBridgeVec[k] ); - REX::teaw::amplitude currAmp = std::bind(&rwgt::fBridge::bridgeCall, &bridges.back(), std::placeholders::_1, std::placeholders::_2); + auto currAmp = [bridge = bridges[relSet]](std::vector& momenta, std::vector& alphaS) mutable { + return bridge.bridgeCall(momenta, alphaS); + }; amps.push_back( currAmp ); + ++relSet; } - // REX::teaw::ampCall subProcSet; // for( auto proc : runSet ){ @@ -129,9 +141,19 @@ int main( int argc, char** argv ){ //std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; REX::teaw::rwgtRunner driver( fileCol, amps ); - driver.runRwgt( outputPath ); - + + auto rwgt_names = driver.getNames(); + auto rwgt_xSecs = driver.getReXSecs(); + auto rwgt_errXSecs = driver.getReXErrs(); + // for( size_t k = 0 ; k < rwgt_names->size() ; ++k ) + // { + // std::cout << "Process: " << rwgt_names->at(k) << "\n"; + // std::cout << "Cross-Section: " << rwgt_xSecs->at(k) << " +/- " << rwgt_errXSecs->at(k) << "\n"; + // } + + writeRwgtCsv( "rwgt_results.csv", rwgt_names, rwgt_xSecs, rwgt_errXSecs ); + return 0; } \ No newline at end of file diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc index eb8d05ae44..211414365d 100644 --- a/tools/REX/rwgt_instance.cc +++ b/tools/REX/rwgt_instance.cc @@ -28,11 +28,12 @@ namespace rwgt{ void warpPad( std::vector& input, unsigned int nWarp = 32 ){ auto nEvt = input.size(); auto nWarpRemain = warpRemain( nEvt, nWarp ); - auto fauxNEvt = nEvt + nWarpRemain; +// auto fauxNEvt = nEvt + nWarpRemain; // auto output = std::vector( fauxNEvt ); // std::copy( input.begin(), input.end(), output.begin()); // input.resize( fauxNEvt ); - for( size_t k = nEvt - nWarpRemain ; k < fauxNEvt ; ++k ){ + input.reserve( nEvt + nWarpRemain ); + for( size_t k = nEvt - nWarpRemain ; k < nEvt ; ++k ){ input.push_back( input[k] ); } return; diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index e0be856db4..e6909ca291 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -25,7 +25,7 @@ namespace %(process_namespace)s{ CppObjectInFortran *bridgeInst; auto evalScatAmps = std::make_shared>( nEvt ); fbridgecreate_( &bridgeInst, &nEvt, &nPar, &nMom ); - fbridgesequence_( &bridgeInst, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); + fbridgesequence_nomultichannel_( &bridgeInst, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &evalScatAmps->at(0), &selHel[0], &selCol[0] ); fbridgedelete_( &bridgeInst ); return evalScatAmps; } @@ -44,37 +44,57 @@ namespace %(process_namespace)s{ return constrBridge; } - std::shared_ptr> procSort( std::string_view status, std::vector arguments ){ + std::shared_ptr> procSort( std::string_view status, std::vector arguments, size_t index ){ std::vector> initPrts = {%(init_prt_ids)s}; std::vector> finPrts = {%(fin_prt_ids)s}; // std::vector initPrts = {"-1"}; // std::vector finPrts = {"1"}; std::shared_ptr> refOrder; - if( status == "-1" ){ + if( index == REX::npos ){ + if( status == "-1" ){ for( auto& prts : initPrts ){ refOrder = REX::getRefOrder( prts, arguments ); - if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + if( std::find(refOrder->begin(), refOrder->end(), REX::npos) == refOrder->end() ){ break; } } return refOrder; } else if( status == "1" ){ for( auto& prts : finPrts ){ refOrder = REX::getRefOrder( prts, arguments ); - if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } + if( std::find(refOrder->begin(), refOrder->end(), REX::npos) == refOrder->end() ){ break; } } return refOrder; } return REX::stoiSort( arguments ); } + else{ + if( index >= initPrts.size() || index >= finPrts.size() ) throw std::runtime_error( "procSort called for out-of-bounds event." ); + if( status == "-1" ){ + refOrder = REX::getRefOrder( initPrts.at(index), arguments ); + return refOrder; + } + else if( status == "1" ){ + refOrder = REX::getRefOrder( finPrts.at(index), arguments ); + return refOrder; + } + return REX::stoiSort( arguments ); + } + } bool checkProc( REX::event& process, std::vector& relStats ){ - REX::statSort locSort = procSort; - auto order = process.getProcOrder( locSort ); - for( auto stat : relStats ){ - auto currPts = order.at( stat ); - if( currPts[currPts.size() - 1 ] == REX::npos ){ return false; } + size_t no_evts = %(no_events)s; + for( size_t k = 0 ; k < no_evts ; ++k ){ + REX::statSort locSort = [ind = k](std::string_view status, std::vector arguments){ + return procSort( status, arguments, ind ); + }; + auto order = process.getProcOrder( locSort ); + for( size_t j = 0 ; j < relStats.size() ; ++j ){ + auto currPts = order.at( relStats[j] ); + if( std::find(currPts.begin(), currPts.end(), REX::npos) != currPts.end() ){ break; } + if( j == relStats.size() - 1 ){ return true; } + } } - return true; + return false; } REX::eventSet eventSetConstr( std::vector& process ){ diff --git a/tools/REX/rwgt_runner.h b/tools/REX/rwgt_runner.h index 4c493e504e..ebbe874ce0 100644 --- a/tools/REX/rwgt_runner.h +++ b/tools/REX/rwgt_runner.h @@ -22,7 +22,7 @@ namespace %(process_namespace)s { std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, int& chanId ); rwgt::fBridge bridgeConstr( std::vector& process, unsigned int warpSize ); rwgt::fBridge bridgeConstr(); - std::shared_ptr> procSort( std::string_view status, std::vector arguments ); + std::shared_ptr> procSort( std::string_view status, std::vector arguments, size_t index = REX::npos ); bool checkProc( REX::event& process, std::vector& relStats ); REX::eventSet eventSetConstruct( std::vector& process ); REX::eventSet getEventSet(); diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 5eb105f467..8fcc1f8029 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -212,14 +212,14 @@ namespace REX::teaw // rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); // } // rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); - // rwgtNames.reserve( rwgtRuns.size() ); + // rwgtNames->reserve( rwgtRuns.size() ); // int p = 1; // for( auto run : rwgtRuns ){ // rwgtProcs.push_back( run.comRunProc() ); // if( run.rwgtName == "" ){ - // rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); + // rwgtNames->push_back( "rwgt_" + std::to_string( p++ ) ); // } else { - // rwgtNames.push_back( std::string(run.rwgtName) ); + // rwgtNames->push_back( std::string(run.rwgtName) ); // } // } // } @@ -239,6 +239,8 @@ namespace REX::teaw if( line[line.find_first_not_of(" \n\r\f\t\v")] == '#' ){ continue; } opts.push_back( line ); } + rwgtNames = std::make_shared>(); + rwgtNames->reserve( lnchPos.size() - 1 ); for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ){ auto setPos = srcCard.find( "set", lnchPos[k] ); if( setPos == REX::npos ){ continue; } @@ -251,14 +253,14 @@ namespace REX::teaw auto namePos = opts.find( "rwgt_name" ); if( namePos != REX::npos ){ auto endName = opts.find_first_of( " \n\r\f\t\v", namePos ); - rwgtNames.push_back( std::string( opts.substr( namePos + 9, endName - namePos - 9 ) ) ); + rwgtNames->push_back( std::string( opts.substr( namePos + 9, endName - namePos - 9 ) ) ); } else { - rwgtNames.push_back( "rwgt_" + std::to_string( k + 1 ) ); + rwgtNames->push_back( "rwgt_" + std::to_string( k + 1 ) ); } } else { - rwgtNames.push_back( "rwgt_" + std::to_string( k + 1 ) ); + rwgtNames->push_back( "rwgt_" + std::to_string( k + 1 ) ); } - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtNames[ rwgtNames.size() - 1 ]; + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtNames->at( rwgtNames->size() - 1 ); } rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); for( auto run : rwgtRuns ){ @@ -341,6 +343,7 @@ namespace REX::teaw rwgtSet = rwgts.rwgtSet; skeleton = rwgts.skeleton; eventFile = rwgts.eventFile; + flatWgts = rwgts.flatWgts; } REX::transSkel& rwgtCollection::getSkeleton(){ if( !this->skeleton ) @@ -373,6 +376,7 @@ namespace REX::teaw gS.push_back( vecOfVecs->at( 3*k + 1 ) ); momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } + flatWgts = eventFile.vectorFlat( wgts ); this->doublesSet = true; } void rwgtCollection::setSkeleton( std::vector& evSets ){ @@ -396,8 +400,10 @@ namespace REX::teaw gS.push_back( vecOfVecs->at( 3*k + 1 ) ); momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); } + flatWgts = eventFile.vectorFlat( wgts ); this->doublesSet = true; } + std::shared_ptr> rwgtCollection::getNames(){ return rwgtSets->rwgtNames; } bool rwgtFiles::rwgtPulled(){ return (rewgtCard != nullptr); } bool rwgtFiles::slhaPulled(){ return (slhaCard != nullptr); } @@ -507,6 +513,10 @@ namespace REX::teaw this->meNormWgts = rwgts.meNormWgts; this->normWgt = rwgts.normWgt; this->rwgtGroup = rwgts.rwgtGroup; + this->normXSecs = rwgts.normXSecs; + this->errXSecs = rwgts.errXSecs; + this->ampNorm = rwgts.ampNorm; + this->reWgts = rwgts.reWgts; } // rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, // ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ @@ -517,7 +527,9 @@ namespace REX::teaw bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } template void rwgtRunner::setMEs(Args&&... args){ - initCards(args...); + initCards(args...); + normXSecs = std::make_shared>( ); + errXSecs = std::make_shared>( ); if( !oneME() ) throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); //ZW FIX THIS @@ -568,6 +580,35 @@ namespace REX::teaw } normWgt = eventFile.vectorFlat( meNormWgts ); } + void rwgtRunner::setAmpNorm( double precision ){ + if( this->ampNorm != 0.0 ){ return; } + auto xSecLines = this->lheFile->getInit()->getLines(); + if( xSecLines.size() > 1 ){ + std::cout << "\n\033[1;33mWarning: Multiple cross-section lines found in LHE file.\nteawREX only supports single (inclusive) process reweighting.\nWill proceed assuming all events belong to first process type.\033[0m\n"; + } + if( xSecLines.size() == 0 ) + throw std::runtime_error( "No cross-section information found in LHE file." ); + auto xSec = std::stod(std::string(xSecLines[0]->xsecup)); + double div = 0.0; + bool sameWeight = true; + for( size_t k = 1 ; k < this->flatWgts->size() - 1 ; k += size_t(flatWgts->size()/21) ){ + if( std::abs( flatWgts->at(0) - flatWgts->at(0) ) > precision ){ + sameWeight = false; + break; + } + } + if( sameWeight ){ + if( std::abs(xSec - flatWgts->at(0)) < precision ){ + this->ampNorm = double( 1 / flatWgts->size()); + return; + } + div = flatWgts->size() * flatWgts->at(0); + } + else{ + div = std::accumulate( flatWgts->begin(), flatWgts->end(), 0.0 ); + } + this->ampNorm = xSec / div; + } template void rwgtRunner::setNormWgts(Args&&... args){ if( !oneME() ){ setMEs(args...); } @@ -601,7 +642,8 @@ namespace REX::teaw newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + reWgts->push_back( newWGTs ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), reWgts->at(reWgts->size() - 1) ); lheIn->addWgt( 0, nuWgt ); return true; } @@ -625,7 +667,8 @@ namespace REX::teaw newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + reWgts->push_back( newWGTs ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), reWgts->at(reWgts->size() - 1), id ); lheIn->addWgt( 0, nuWgt ); return true; } @@ -650,7 +693,8 @@ namespace REX::teaw newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); + reWgts->push_back( newWGTs ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), reWgts->at(reWgts->size() - 1) ); lheIn->addWgt( 0, nuWgt ); return true; } @@ -675,7 +719,8 @@ namespace REX::teaw newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); + reWgts->push_back( newWGTs ); + REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), reWgts->at(reWgts->size() - 1), id ); lheIn->addWgt( 0, nuWgt ); return true; } @@ -685,20 +730,65 @@ namespace REX::teaw throw std::runtime_error( "Failed to write LHE file." ); return true; } - void rwgtRunner::runRwgt( const std::string& output ){ + bool rwgtRunner::calcXSecs(){ + if( normXSecs->size() != 0 ){ return true; } + if( ampNorm == 0.0 ) + throw std::runtime_error( "Normalisation factor for scattering amplitudes has not been calculated.\nReweighted LHE file has been written, but may contain errors." ); + if( reWgts->size() == 0 ) + throw std::runtime_error( "No reweighting has been performed, or new weights have not been stored properly.\nReweighted LHE file has been written, but may contain errors." ); + for( size_t k = 0 ; k < reWgts->size() ; ++k ){ + normXSecs->push_back( ampNorm * std::accumulate( reWgts->at(k)->begin(), reWgts->at(k)->end(), 0.0 ) ); + } + return true; + } + bool rwgtRunner::calcXErrs(){ + if( errXSecs->size() != 0 ){ return true; } + if( reWgts->size() == 0 ) + throw std::runtime_error( "No reweighting has been performed, or new weights have not been stored properly.\nReweighted LHE file has been written, but may contain errors." ); + if( normXSecs->size() != reWgts->size() ) + throw std::runtime_error( "Different number of reweighted event sets and reweighted cross sections internally.\nReweighted LHE file has been written, but may contain errors." ); + double invN = 1. / double(reWgts->at(0)->size()); + double sqrtInvN = std::sqrt( invN ); + auto xSecLines = this->lheFile->getInit()->getLines(); + double xSec = std::stod(std::string(xSecLines[0]->xsecup)); + double xErr = std::stod(std::string(xSecLines[0]->xerrup)); + for( size_t k = 0 ; k < reWgts->size() ; ++k ){ + double xSecCurr = normXSecs->at(k); + auto wgts = reWgts->at(k); + double omega = 0.0; + double omegaSqr = 0.0; + for( auto wgt : *wgts ){ + double invWgt = 1. / wgt; + omega += invWgt; + omegaSqr += invWgt * invWgt; + } + double var = (omegaSqr - omega * omega * invN) * invN * xSecCurr * xSecCurr; + errXSecs->push_back( std::sqrt( sqrtInvN * var )*xSec + xSecCurr * omega * invN * xErr ); + } + return true; + } + void rwgtRunner::runRwgt( const std::string& output, double precision ){ + reWgts = std::make_shared>>>( std::vector>>() ); + setAmpNorm( precision ); setMEs(); - setNormWgts(); - rwgtGroup = std::make_shared(); - auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); - auto paramSets = rwgtSets->writeCards( *slhaParameters ); - for( size_t k = 0 ; k < paramSets.size(); k++ ){ - singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); + setNormWgts(); + rwgtGroup = std::make_shared(); + auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); + auto paramSets = rwgtSets->writeCards( *slhaParameters ); + for( size_t k = 0 ; k < paramSets.size(); k++ ){ + singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames->at(k) ); std::cout << "."; } - lheFileWriter( lheFile, output ); - REX::filePusher( slhaPath, *slhaCard ); + lheFileWriter( lheFile, output ); + REX::filePusher( slhaPath, *slhaCard ); std::cout << "\nReweighting done.\n"; } + std::shared_ptr> rwgtRunner::getReXSecs(){ + if(this->calcXSecs()){ return normXSecs; } + } + std::shared_ptr> rwgtRunner::getReXErrs(){ + if(this->calcXErrs()){ return errXSecs; } + } void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ rwgt.runRwgt( path ); diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h index 326d15de44..c2bb695213 100644 --- a/tools/REX/teawREX.h +++ b/tools/REX/teawREX.h @@ -77,12 +77,13 @@ namespace REX::teaw std::vector rwgtRuns; std::vector rwgtProcs; std::vector opts; - std::vector rwgtNames; + std::shared_ptr> rwgtNames; std::string_view srcCard; void parse( bool parseOnline = false ); rwgtCard( std::string_view reweight_card ); rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline = false ); std::vector> writeCards( REX::lesHouchesCard& slhaOrig ); + std::shared_ptr> getNames(); }; @@ -103,6 +104,7 @@ namespace REX::teaw rwgtCollection(); rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ); rwgtCollection( const rwgtCollection& rwgts ); + std::shared_ptr> getNames(); protected: template void setDoubles(Args&&... args); @@ -114,6 +116,7 @@ namespace REX::teaw std::vector>> wgts; std::vector>> gS; std::vector>> momenta; + std::shared_ptr> flatWgts; bool lheFileSet = false; bool slhaSet = false; bool rwgtSet = false; @@ -179,13 +182,20 @@ namespace REX::teaw std::vector meVec; std::vector>> initMEs; std::vector>> meNormWgts; + std::shared_ptr>>> reWgts; std::shared_ptr> normWgt; + double ampNorm = 0.0; + std::shared_ptr> normXSecs; + std::shared_ptr> errXSecs; std::shared_ptr rwgtGroup; template void setMEs(Args&&... args); + void setAmpNorm( double precision ); bool setParamCard( std::shared_ptr slhaParams ); void setNormWgtsSingleME(); void setNormWgtsMultiME(); + bool calcXSecs(); + bool calcXErrs(); template void setNormWgts(Args&&... args); bool singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheFile, size_t currId ); @@ -195,7 +205,9 @@ namespace REX::teaw std::string& id, REX::event& ev ); bool lheFileWriter( std::shared_ptr lheFile, std::string outputDir = "rwgt_evts.lhe" ); public: - void runRwgt( const std::string& output ); + void runRwgt( const std::string& output, double precision = 1e-6 ); + std::shared_ptr> getReXSecs(); + std::shared_ptr> getReXErrs(); }; From ad48bbc863fbcaa594c04e03373475e29dfba3a9 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 9 Sep 2024 11:28:17 +0200 Subject: [PATCH 22/76] updated submodule --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 89822fa1a9..67ea52201f 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 89822fa1a9ad49854794816a809ff828866773c3 +Subproject commit 67ea52201f2b4b2a519db87e58b43db3db2662a0 From 6c00da5705bb2b6a797cdf40cf7b1591c2c6fb30 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 9 Sep 2024 11:47:57 +0200 Subject: [PATCH 23/76] update submodule to merged upstream version --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 67ea52201f..c9f67dd1ab 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 67ea52201f2b4b2a519db87e58b43db3db2662a0 +Subproject commit c9f67dd1ab96fe04c70f7e12f024a092e37109a0 From 506b2e8242c1fb286fb9b153490f129da7e33470 Mon Sep 17 00:00:00 2001 From: Zenny Jovi Joestar Wettersten Date: Tue, 10 Sep 2024 10:25:00 +0200 Subject: [PATCH 24/76] reverted to earlier mg5 branch --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index c9f67dd1ab..67ea52201f 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit c9f67dd1ab96fe04c70f7e12f024a092e37109a0 +Subproject commit 67ea52201f2b4b2a519db87e58b43db3db2662a0 From 52016c2bf62b9287dd708b6db23761339216fa31 Mon Sep 17 00:00:00 2001 From: Zenny Jovi Joestar Wettersten Date: Tue, 10 Sep 2024 14:55:17 +0200 Subject: [PATCH 25/76] modified makefiles to support gpu compilation, made cuda default target. does NOT automatically detect whether cuda is supported or not before defaulting to it -- backend needs to be modified directly to compile on other systems (right now) --- .../iolibs/template_files/gpu/GpuRuntime.h | 6 +- .../template_files/gpu/cudacpp_config.mk | 2 +- .../template_files/gpu/cudacpp_driver.mk | 14 +- .../template_files/gpu/cudacpp_runner.mk | 143 +++++++++--------- 4 files changed, 89 insertions(+), 76 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h index 860c7fde16..862c2c963f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h @@ -38,7 +38,7 @@ namespace mg5amcGpu // *** FIXME! This will all need to be designed differently when going to multi-GPU nodes! *** struct GpuRuntime final { - GpuRuntime( const bool debug = true ) + GpuRuntime( const bool debug = false ) : m_debug( debug ) { setUp( m_debug ); } ~GpuRuntime() { tearDown( m_debug ); } GpuRuntime( const GpuRuntime& ) = delete; @@ -50,7 +50,7 @@ namespace mg5amcGpu // Set up CUDA application // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** // Calling cudaSetDevice on startup is useful to properly book-keep the time spent in CUDA initialization - static void setUp( const bool debug = true ) + static void setUp( const bool debug = false ) { // ** NB: it is useful to call cudaSetDevice, or cudaFree, to properly book-keep the time spent in CUDA initialization // ** NB: otherwise, the first CUDA operation (eg a cudaMemcpyToSymbol in CPPProcess ctor) appears to take much longer! @@ -71,7 +71,7 @@ namespace mg5amcGpu // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** // Calling cudaDeviceReset on shutdown is only needed for checking memory leaks in cuda-memcheck // See https://docs.nvidia.com/cuda/cuda-memcheck/index.html#leak-checking - static void tearDown( const bool debug = true ) + static void tearDown( const bool debug = false ) { if( debug ) std::cout << "__GpuRuntime: calling GpuDeviceReset()" << std::endl; checkGpu( gpuDeviceReset() ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk index 438fcd1661..1bdccc6654 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk @@ -10,7 +10,7 @@ # Set the default BACKEND (CUDA, HIP or C++/SIMD) choice ifeq ($(BACKEND),) - override BACKEND = cppauto + override BACKEND = cuda endif # Set the default FPTYPE (floating point type) choice diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk index e7ce3052d5..bb91250a95 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk @@ -111,7 +111,8 @@ export CXXFLAGS #=== (note, this is done also for C++, as NVTX and CURAND/ROCRAND are also needed by the C++ backends) # Set CUDA_HOME from the path to nvcc, if it exists -override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) +#override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) +CUDA_HOME := $(patsubst %/bin/nvcc,%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) @@ -257,10 +258,12 @@ else endif # Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) +export CUDA_HOME export GPUCC export GPUFLAGS export GPULANGUAGE export GPUSUFFIX +export XCOMPILERFLAG #------------------------------------------------------------------------------- @@ -783,7 +786,7 @@ $(gpu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX)) endif $(gpu_rwgt): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_rwgt): $(BUILDDIR)/$(BUILDDIR)/rwgt_driver.o $(rwgtlib) - $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(gpu_proclibs) $(rwgtlib) + $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) $(gpu_proclibs) $(rwgtlib) endif #------------------------------------------------------------------------------- @@ -1004,9 +1007,9 @@ endif #------------------------------------------------------------------------------- # Target: clean the builds -.PHONY: clean +.PHONY: clean clean-rwgtlib -clean: +clean: clean-rwgtlib ifeq ($(USEBUILDDIR),1) rm -rf $(BUILDDIR) else @@ -1016,6 +1019,9 @@ endif $(MAKE) -C ../src clean -f $(CUDACPP_SRC_MAKEFILE) ### rm -rf $(INCDIR) +clean-rwgtlib: + @for dir in $(DIRS); do $(MAKE) -C $$dir clean; done + cleanall: @echo $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk index d8be8cd4e8..2ffeb72484 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -22,7 +22,7 @@ override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk include ../../src/cudacpp_config.mk # Export CUDACPP_BUILDDIR (so that there is no need to check/define it again in cudacpp_src.mk) -export CUDACPP_BUILDDIR +#export CUDACPP_BUILDDIR #------------------------------------------------------------------------------- @@ -84,10 +84,10 @@ endif #=== Configure the C++ compiler -CXXFLAGS = $(OPTFLAGS) -std=c++17 -Wall -Wshadow -Wextra -ifeq ($(shell $(CXX) --version | grep ^nvc++),) - CXXFLAGS += -ffast-math # see issue #117 -endif +#CXXFLAGS = $(OPTFLAGS) -std=c++17 -Wall -Wshadow -Wextra +#ifeq ($(shell $(CXX) --version | grep ^nvc++),) +# CXXFLAGS += -ffast-math # see issue #117 +#endif ###CXXFLAGS+= -Ofast # performance is not different from --fast-math ###CXXFLAGS+= -g # FOR DEBUGGING ONLY @@ -98,12 +98,12 @@ endif # See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html # Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) - CXXFLAGS += -mmacosx-version-min=11.3 -endif +#ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) +# CXXFLAGS += -mmacosx-version-min=11.3 +#endif # Export CXXFLAGS (so that there is no need to check/define it again in cudacpp_src.mk) -export CXXFLAGS +#export CXXFLAGS #------------------------------------------------------------------------------- @@ -111,10 +111,10 @@ export CXXFLAGS #=== (note, this is done also for C++, as NVTX and CURAND/ROCRAND are also needed by the C++ backends) # Set CUDA_HOME from the path to nvcc, if it exists -override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) +#override CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) # Set HIP_HOME from the path to hipcc, if it exists -override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) +#override HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) # Configure CUDA_INC (for CURAND and NVTX) and NVTX if a CUDA installation exists # (FIXME? Is there any equivalent of NVTX FOR HIP? What should be configured if both CUDA and HIP are installed?) @@ -144,22 +144,22 @@ endif #=== Configure the CUDA or HIP compiler (only for the CUDA and HIP backends) #=== (NB: throughout all makefiles, an empty GPUCC is used to indicate that this is a C++ build, i.e. that BACKEND is neither cuda nor hip!) -ifeq ($(BACKEND),cuda) +#ifeq ($(BACKEND),cuda) # If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) # This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below - ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside - $(error BACKEND=$(BACKEND) but CUDA builds are not supported for multi-word CXX "$(CXX)") - endif +# ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside +# $(error BACKEND=$(BACKEND) but CUDA builds are not supported for multi-word CXX "$(CXX)") +# endif # Set GPUCC as $(CUDA_HOME)/bin/nvcc (it was already checked above that this exists) - GPUCC = $(CUDA_HOME)/bin/nvcc - XCOMPILERFLAG = -Xcompiler - GPULANGUAGE = cu - GPUSUFFIX = cuda +# GPUCC = $(CUDA_HOME)/bin/nvcc +# XCOMPILERFLAG = -Xcompiler +# GPULANGUAGE = cu +# GPUSUFFIX = cuda # Basic compiler flags (optimization and includes) - GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) +# GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html @@ -168,31 +168,31 @@ ifeq ($(BACKEND),cuda) # This will embed device code for 70, and PTX for 70+. # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 +# MADGRAPH_CUDA_ARCHITECTURE ?= 70 ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 - comma:=, - GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) - GPUFLAGS += $(GPUARCHFLAGS) +# comma:=, +# GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) +# GPUFLAGS += $(GPUARCHFLAGS) # Other NVidia-specific flags - CUDA_OPTFLAGS = -lineinfo - GPUFLAGS += $(CUDA_OPTFLAGS) +# CUDA_OPTFLAGS = -lineinfo +# GPUFLAGS += $(CUDA_OPTFLAGS) # NVCC version ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) # Fast math - GPUFLAGS += -use_fast_math +# GPUFLAGS += -use_fast_math # Extra build warnings ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow # CUDA includes and NVTX - GPUFLAGS += $(CUDA_INC) $(USE_NVTX) +# GPUFLAGS += $(CUDA_INC) $(USE_NVTX) # C++ standard - GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h +# GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h # For nvcc, use -maxrregcount to control the maximum number of registries (this does not exist in hipcc) # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) @@ -203,81 +203,81 @@ ifeq ($(BACKEND),cuda) # Set the host C++ compiler for nvcc via "-ccbin " # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) - GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) +# GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) - ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) - GPUFLAGS += -allow-unsupported-compiler - endif +# ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) +# GPUFLAGS += -allow-unsupported-compiler +# endif -else ifeq ($(BACKEND),hip) +#else ifeq ($(BACKEND),hip) # Set GPUCC as $(HIP_HOME)/bin/hipcc (it was already checked above that this exists) - GPUCC = $(HIP_HOME)/bin/hipcc - XCOMPILERFLAG = - GPULANGUAGE = hip - GPUSUFFIX = hip +# GPUCC = $(HIP_HOME)/bin/hipcc +# XCOMPILERFLAG = +# GPULANGUAGE = hip +# GPUSUFFIX = hip # Basic compiler flags (optimization and includes) - GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) +# GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # AMD HIP architecture flags - GPUARCHFLAGS = --offload-arch=gfx90a - GPUFLAGS += $(GPUARCHFLAGS) +# GPUARCHFLAGS = --offload-arch=gfx90a +# GPUFLAGS += $(GPUARCHFLAGS) # Other AMD-specific flags - GPUFLAGS += -target x86_64-linux-gnu -DHIP_PLATFORM=amd +# GPUFLAGS += -target x86_64-linux-gnu -DHIP_PLATFORM=amd # Fast math (is -DHIP_FAST_MATH equivalent to -ffast-math?) - GPUFLAGS += -DHIP_FAST_MATH + # GPUFLAGS += -DHIP_FAST_MATH # Extra build warnings ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow # HIP includes - HIP_INC = -I$(HIP_HOME)/include/ - GPUFLAGS += $(HIP_INC) + # HIP_INC = -I$(HIP_HOME)/include/ + # GPUFLAGS += $(HIP_INC) # C++ standard - GPUFLAGS += -std=c++17 + # GPUFLAGS += -std=c++17 -else +#else # Backend is neither cuda nor hip - override GPUCC= - override GPUFLAGS= +# override GPUCC= +# override GPUFLAGS= # Sanity check, this should never happen: if GPUCC is empty, then this is a C++ build, i.e. BACKEND is neither cuda nor hip. # In practice, in the following, "ifeq ($(GPUCC),)" is equivalent to "ifneq ($(findstring cpp,$(BACKEND)),)". # Conversely, note that GPUFLAGS is non-empty also for C++ builds, but it is never used in that case. - ifeq ($(findstring cpp,$(BACKEND)),) - $(error INTERNAL ERROR! Unknown backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) - endif +# ifeq ($(findstring cpp,$(BACKEND)),) +# $(error INTERNAL ERROR! Unknown backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) +# endif -endif +#endif # Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) -export GPUCC -export GPUFLAGS -export GPULANGUAGE -export GPUSUFFIX +#export GPUCC +#export GPUFLAGS +#export GPULANGUAGE +#export GPUSUFFIX #------------------------------------------------------------------------------- #=== Configure ccache for C++ and CUDA/HIP builds # Enable ccache if USECCACHE=1 -ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) - override CXX:=ccache $(CXX) -endif +#ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) +# override CXX:=ccache $(CXX) +#endif #ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) # override AR:=ccache $(AR) #endif -ifneq ($(GPUCC),) - ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) - override GPUCC:=ccache $(GPUCC) - endif -endif +#ifneq ($(GPUCC),) +# ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) +# override GPUCC:=ccache $(GPUCC) +# endif +#endif #------------------------------------------------------------------------------- @@ -784,9 +784,16 @@ endif $(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) -gpu_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(gpu_objects_exe) +ifneq ($(shell $(CXX) --version | grep ^Intel),) +$(gpu_rwgtlib): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') +$(gpu_rwgtlib): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') +else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 +$(gpu_rwgtlib): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc +endif +$(gpu_rwgtlib): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH +gpu_rwgtfiles := $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(gpu_rwgtlib): $(gpu_rwgtfiles) $(gpu_objects_lib) - $(GPUCC) -shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_GPULIB) + $(GPUCC) -shared -o $@ $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- @@ -1013,7 +1020,7 @@ clean: ifeq ($(USEBUILDDIR),1) rm -rf $(BUILDDIR) else - rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe + rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.so $(BUILDDIR)/*.exe rm -f $(LIBDIR)/lib*.so endif $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) From 00a081c43b08962c515c9510ce2074b8629fb1ff Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 16 Sep 2024 14:25:07 +0200 Subject: [PATCH 26/76] changed default backend back to cppauto for testing --- MG5aMC/mg5amcnlo | 2 +- .../madgraph/iolibs/template_files/gpu/cudacpp_config.mk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 67ea52201f..89a47f894b 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 67ea52201f2b4b2a519db87e58b43db3db2662a0 +Subproject commit 89a47f894bba090c60132735ecdcdb6d81618e9a diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk index 1bdccc6654..438fcd1661 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk @@ -10,7 +10,7 @@ # Set the default BACKEND (CUDA, HIP or C++/SIMD) choice ifeq ($(BACKEND),) - override BACKEND = cuda + override BACKEND = cppauto endif # Set the default FPTYPE (floating point type) choice From 06bd58f758f1233b2d1e73a110816fb5701c5f52 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 16 Sep 2024 15:43:20 +0200 Subject: [PATCH 27/76] fixed submodule link --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 89a47f894b..d7f61db844 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 89a47f894bba090c60132735ecdcdb6d81618e9a +Subproject commit d7f61db844bcd8c0741f777c3fdf0099c6ed1331 From a166e747fb7a8bd01d2146aeb6d426bdbbf099cc Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 17 Sep 2024 10:03:32 +0200 Subject: [PATCH 28/76] separated REX and teawREX compilations --- .../template_files/gpu/cudacpp_driver.mk | 228 +----------------- .../template_files/gpu/cudacpp_rex_src.mk | 2 +- .../template_files/gpu/cudacpp_runner.mk | 199 --------------- .../CUDACPP_SA_OUTPUT/model_handling.py | 6 +- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 4 +- tools/REX/REX.cc | 28 +-- tools/REX/REX.h | 24 +- tools/REX/teawREX.cc | 19 +- 8 files changed, 51 insertions(+), 459 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk index bb91250a95..72360410f9 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk @@ -662,25 +662,9 @@ $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fn endif # # Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) -# $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/rwgt_driver_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/rwgt_driver_gpu.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) -# # Apply special build flags only to check_sa_.o and (Cu|Hip)randRandomNumberKernel_.o -# $(BUILDDIR)/check_sa_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/check_sa_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o: GPUFLAGS += $(RNDCXXFLAGS) -# ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -# $(BUILDDIR)/CurandRandomNumberKernel_cpp.o: CXXFLAGS += $(CUDA_INC) -# endif -# ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -# $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o: CXXFLAGS += $(HIP_INC) -# endif - # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) ifneq ($(GPUCC),) @@ -688,21 +672,6 @@ GPUFLAGS += -Wno-deprecated-builtins endif endif -# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) -# This patch does remove the warning, but I prefer to keep it disabled for the moment... -###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option -###ifneq ($(GPUCC),) -###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option -###endif -###endif - -#### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto - -#### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) - # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) $(BUILDDIR)/%%_cpp.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) @@ -719,51 +688,10 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): common (src) library -commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so +# commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) - $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) - -#------------------------------------------------------------------------------- - -# processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') -# ###$(info processid_short=$(processid_short)) - -# MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -# cxx_objects_lib=$(BUILDDIR)/CPPProcess_cpp.o $(BUILDDIR)/MatrixElementKernels_cpp.o $(BUILDDIR)/BridgeKernels_cpp.o $(BUILDDIR)/CrossSectionKernels_cpp.o -# cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cpp.o $(BUILDDIR)/RamboSamplingKernels_cpp.o - -# ifneq ($(GPUCC),) -# MG5AMC_GPULIB = mg5amc_$(processid_short)_$(GPUSUFFIX) -# gpu_objects_lib=$(BUILDDIR)/CPPProcess_$(GPUSUFFIX).o $(BUILDDIR)/MatrixElementKernels_$(GPUSUFFIX).o $(BUILDDIR)/BridgeKernels_$(GPUSUFFIX).o $(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o -# gpu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/RamboSamplingKernels_$(GPUSUFFIX).o -# endif - -# # Target (and build rules): C++ and CUDA/HIP shared libraries -# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge_cpp.o -# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge_cpp.o -# $(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) -# $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) - -# ifneq ($(GPUCC),) -# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(BUILDDIR)/fbridge_$(GPUSUFFIX).o -# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: gpu_objects_lib += $(BUILDDIR)/fbridge_$(GPUSUFFIX).o -# $(LIBDIR)/lib$(MG5AMC_GPULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) -# $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# # Bypass std::filesystem completely to ease portability on LUMI #803 -# #ifneq ($(findstring hipcc,$(GPUCC)),) -# # $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs -# #else -# # $(GPUCC) --shared -o $@ $(gpu_objects_lib) $(GPULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# #endif -# endif - -#------------------------------------------------------------------------------- - -# Target (and build rules): Fortran include files -###$(INCDIR)/%%.inc : ../%%.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### \cp $< $@ +# $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) +# $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) #------------------------------------------------------------------------------- @@ -796,156 +724,6 @@ $(BUILDDIR)/%%_fortran.o : %%.f *.inc @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi $(FC) -I. -c $< -o $@ -# Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%%_fortran.o : %%.f *.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -### $(FC) -I. -I$(INCDIR) -c $< -o $@ - -# Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc - -# ifeq ($(UNAME_S),Darwin) -# $(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -# endif -# $(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) -# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ -# else -# $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -# endif - -# ifneq ($(GPUCC),) -# ifneq ($(shell $(CXX) --version | grep ^Intel),) -# $(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -# $(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -# endif -# ifeq ($(UNAME_S),Darwin) -# $(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -# endif -# $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) -# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -# else -# $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -# endif -# endif - -#------------------------------------------------------------------------------- - -# # Target (and build rules): test objects and test executable -# ifeq ($(GPUCC),) -# $(BUILDDIR)/testxxx_cpp.o: $(GTESTLIBS) -# $(BUILDDIR)/testxxx_cpp.o: INCFLAGS += $(GTESTINC) -# $(BUILDDIR)/testxxx_cpp.o: testxxx_cc_ref.txt -# $(cxx_testmain): $(BUILDDIR)/testxxx_cpp.o -# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testxxx_cpp.o # Comment out this line to skip the C++ test of xxx functions -# else -# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: $(GTESTLIBS) -# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -# $(BUILDDIR)/testxxx_$(GPUSUFFIX).o: testxxx_cc_ref.txt -# $(gpu_testmain): $(BUILDDIR)/testxxx_$(GPUSUFFIX).o -# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testxxx_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP test of xxx functions -# endif - -# ifneq ($(UNAME_S),Darwin) # Disable testmisc on Darwin (workaround for issue #838) -# ifeq ($(GPUCC),) -# $(BUILDDIR)/testmisc_cpp.o: $(GTESTLIBS) -# $(BUILDDIR)/testmisc_cpp.o: INCFLAGS += $(GTESTINC) -# $(cxx_testmain): $(BUILDDIR)/testmisc_cpp.o -# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/testmisc_cpp.o # Comment out this line to skip the C++ miscellaneous tests -# else -# $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: $(GTESTLIBS) -# $(BUILDDIR)/testmisc_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -# $(gpu_testmain): $(BUILDDIR)/testmisc_$(GPUSUFFIX).o -# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/testmisc_$(GPUSUFFIX).o # Comment out this line to skip the CUDA/HIP miscellaneous tests -# endif -# endif - -# ifeq ($(GPUCC),) -# $(BUILDDIR)/runTest_cpp.o: $(GTESTLIBS) -# $(BUILDDIR)/runTest_cpp.o: INCFLAGS += $(GTESTINC) -# $(cxx_testmain): $(BUILDDIR)/runTest_cpp.o -# $(cxx_testmain): cxx_objects_exe += $(BUILDDIR)/runTest_cpp.o -# else -# $(BUILDDIR)/runTest_$(GPUSUFFIX).o: $(GTESTLIBS) -# $(BUILDDIR)/runTest_$(GPUSUFFIX).o: INCFLAGS += $(GTESTINC) -# ifneq ($(shell $(CXX) --version | grep ^Intel),) -# $(gpu_testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -# $(gpu_testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -# else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -# $(gpu_testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc -# endif -# $(gpu_testmain): $(BUILDDIR)/runTest_$(GPUSUFFIX).o -# $(gpu_testmain): gpu_objects_exe += $(BUILDDIR)/runTest_$(GPUSUFFIX).o -# endif - -# ifeq ($(GPUCC),) -# $(cxx_testmain): $(GTESTLIBS) -# $(cxx_testmain): INCFLAGS += $(GTESTINC) -# $(cxx_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc -# else -# $(gpu_testmain): $(GTESTLIBS) -# $(gpu_testmain): INCFLAGS += $(GTESTINC) -# $(gpu_testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest # adding also -lgtest_main is no longer necessary since we added main() to testxxx.cc -# endif - -# ifeq ($(GPUCC),) # if at all, OMP is used only in CXX builds (not in GPU builds) -# ifneq ($(OMPFLAGS),) -# ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -# $(cxx_testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) -# else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -# $(cxx_testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -# ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -# ###$(cxx_testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) -# else -# $(cxx_testmain): LIBFLAGS += -lgomp -# endif -# endif -# endif - -# # Test quadmath in testmisc.cc tests for constexpr_math #627 -# ###ifeq ($(GPUCC),) -# ###$(cxx_testmain): LIBFLAGS += -lquadmath -# ###else -# ###$(gpu_testmain): LIBFLAGS += -lquadmath -# ###endif - -# # Bypass std::filesystem completely to ease portability on LUMI #803 -# ###ifneq ($(findstring hipcc,$(GPUCC)),) -# ###$(gpu_testmain): LIBFLAGS += -lstdc++fs -# ###endif - -# ifeq ($(GPUCC),) # link only runTest_cpp.o -# $(cxx_testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(cxx_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) -# $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -# else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both runTest_cpp.o and runTest_$(GPUSUFFIX).o) -# $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) -# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -# else -# $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda -# endif -# endif - -# # Use target gtestlibs to build only googletest -# ifneq ($(GTESTLIBS),) -# gtestlibs: $(GTESTLIBS) -# endif - -# # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 -# $(GTESTLIBS): -# ifneq ($(shell which flock 2>/dev/null),) -# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -# flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) -# else -# if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi -# endif - #------------------------------------------------------------------------------- # Target: build all targets in all BACKEND modes (each BACKEND mode in a separate build directory) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk index dedc398029..9725eae803 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk @@ -144,7 +144,7 @@ endif #------------------------------------------------------------------------------- cxx_objects=$(addprefix $(BUILDDIR)/, read_slha_cpp.o) -#cxx_objects+=$(addprefix $(BUILDDIR)/, REX_cpp.o) # ZW: not all functionality from REX needed for teawREX is in the header, so for now just include REX.cc in teawREX.cc +cxx_objects+=$(addprefix $(BUILDDIR)/, REX_cpp.o) # ZW: not all functionality from REX needed for teawREX is in the header, so for now just include REX.cc in teawREX.cc cxx_objects+=$(addprefix $(BUILDDIR)/, teawREX_cpp.o) cxx_objects+=$(addprefix $(BUILDDIR)/, rwgt_instance_cpp.o) ifeq ($(GPUCC),) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk index 2ffeb72484..8b48c30781 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -82,31 +82,6 @@ endif #------------------------------------------------------------------------------- -#=== Configure the C++ compiler - -#CXXFLAGS = $(OPTFLAGS) -std=c++17 -Wall -Wshadow -Wextra -#ifeq ($(shell $(CXX) --version | grep ^nvc++),) -# CXXFLAGS += -ffast-math # see issue #117 -#endif -###CXXFLAGS+= -Ofast # performance is not different from --fast-math -###CXXFLAGS+= -g # FOR DEBUGGING ONLY - -# Optionally add debug flags to display the full list of flags (eg on Darwin) -###CXXFLAGS+= -v - -# Note: AR, CXX and FC are implicitly defined if not set externally -# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html - -# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" -#ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -# CXXFLAGS += -mmacosx-version-min=11.3 -#endif - -# Export CXXFLAGS (so that there is no need to check/define it again in cudacpp_src.mk) -#export CXXFLAGS - -#------------------------------------------------------------------------------- - #=== Configure the GPU compiler (CUDA or HIP) #=== (note, this is done also for C++, as NVTX and CURAND/ROCRAND are also needed by the C++ backends) @@ -126,160 +101,6 @@ else override CUDA_INC= endif -# NB: NEW LOGIC FOR ENABLING AND DISABLING CUDA OR HIP BUILDS (AV Feb-Mar 2024) -# - In the old implementation, by default the C++ targets for one specific AVX were always built together with either CUDA or HIP. -# If both CUDA and HIP were installed, then CUDA took precedence over HIP, and the only way to force HIP builds was to disable -# CUDA builds by setting CUDA_HOME to an invalid value (as CUDA_HOME took precdence over PATH to find the installation of nvcc). -# Similarly, C++-only builds could be forced by setting CUDA_HOME and/or HIP_HOME to invalid values. A check for an invalid nvcc -# in CUDA_HOME or an invalid hipcc HIP_HOME was necessary to ensure this logic, and had to be performed at the very beginning. -# - In the new implementation (PR #798), separate individual builds are performed for one specific C++/AVX mode, for CUDA or -# for HIP. The choice of the type of build is taken depending on the value of the BACKEND variable (replacing the AVX variable). -# Unlike what happened in the past, nvcc and hipcc must have already been added to PATH. Using 'which nvcc' and 'which hipcc', -# their existence and their location is checked, and the variables CUDA_HOME and HIP_HOME are internally set by this makefile. -# This must be still done before backend-specific customizations, e.g. because CURAND and NVTX are also used in C++ builds. -# Note also that a preliminary check for nvcc and hipcc if BACKEND is cuda or hip is performed in cudacpp_config.mk. -# - Note also that the REQUIRE_CUDA variable (which was used in the past, e.g. for CI tests on GPU #443) is now (PR #798) no -# longer necessary, as it is now equivalent to BACKEND=cuda. Similarly, there is no need to introduce a REQUIRE_HIP variable. - -#=== Configure the CUDA or HIP compiler (only for the CUDA and HIP backends) -#=== (NB: throughout all makefiles, an empty GPUCC is used to indicate that this is a C++ build, i.e. that BACKEND is neither cuda nor hip!) - -#ifeq ($(BACKEND),cuda) - - # If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) - # This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below -# ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside -# $(error BACKEND=$(BACKEND) but CUDA builds are not supported for multi-word CXX "$(CXX)") -# endif - - # Set GPUCC as $(CUDA_HOME)/bin/nvcc (it was already checked above that this exists) -# GPUCC = $(CUDA_HOME)/bin/nvcc -# XCOMPILERFLAG = -Xcompiler -# GPULANGUAGE = cu -# GPUSUFFIX = cuda - - # Basic compiler flags (optimization and includes) -# GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) - - # NVidia CUDA architecture flags - # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html - # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # This will embed device code for 70, and PTX for 70+. - # One may pass MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to the make command to use another value or list of values (see #533). - # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). -# MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###GPUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###GPUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 -# comma:=, -# GPUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) -# GPUFLAGS += $(GPUARCHFLAGS) - - # Other NVidia-specific flags -# CUDA_OPTFLAGS = -lineinfo -# GPUFLAGS += $(CUDA_OPTFLAGS) - - # NVCC version - ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) - - # Fast math -# GPUFLAGS += -use_fast_math - - # Extra build warnings - ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow - - # CUDA includes and NVTX -# GPUFLAGS += $(CUDA_INC) $(USE_NVTX) - - # C++ standard -# GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h - - # For nvcc, use -maxrregcount to control the maximum number of registries (this does not exist in hipcc) - # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) - - # Set the host C++ compiler for nvcc via "-ccbin " - # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) -# GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) - - # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) -# ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) -# GPUFLAGS += -allow-unsupported-compiler -# endif - -#else ifeq ($(BACKEND),hip) - - # Set GPUCC as $(HIP_HOME)/bin/hipcc (it was already checked above that this exists) -# GPUCC = $(HIP_HOME)/bin/hipcc -# XCOMPILERFLAG = -# GPULANGUAGE = hip -# GPUSUFFIX = hip - - # Basic compiler flags (optimization and includes) -# GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) - - # AMD HIP architecture flags -# GPUARCHFLAGS = --offload-arch=gfx90a -# GPUFLAGS += $(GPUARCHFLAGS) - - # Other AMD-specific flags -# GPUFLAGS += -target x86_64-linux-gnu -DHIP_PLATFORM=amd - - # Fast math (is -DHIP_FAST_MATH equivalent to -ffast-math?) - # GPUFLAGS += -DHIP_FAST_MATH - - # Extra build warnings - ###GPUFLAGS += $(XCOMPILERFLAG) -Wall $(XCOMPILERFLAG) -Wextra $(XCOMPILERFLAG) -Wshadow - - # HIP includes - # HIP_INC = -I$(HIP_HOME)/include/ - # GPUFLAGS += $(HIP_INC) - - # C++ standard - # GPUFLAGS += -std=c++17 - -#else - - # Backend is neither cuda nor hip -# override GPUCC= -# override GPUFLAGS= - - # Sanity check, this should never happen: if GPUCC is empty, then this is a C++ build, i.e. BACKEND is neither cuda nor hip. - # In practice, in the following, "ifeq ($(GPUCC),)" is equivalent to "ifneq ($(findstring cpp,$(BACKEND)),)". - # Conversely, note that GPUFLAGS is non-empty also for C++ builds, but it is never used in that case. -# ifeq ($(findstring cpp,$(BACKEND)),) -# $(error INTERNAL ERROR! Unknown backend BACKEND='$(BACKEND)': supported backends are $(foreach backend,$(SUPPORTED_BACKENDS),'$(backend)')) -# endif - -#endif - -# Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) -#export GPUCC -#export GPUFLAGS -#export GPULANGUAGE -#export GPUSUFFIX - -#------------------------------------------------------------------------------- - -#=== Configure ccache for C++ and CUDA/HIP builds - -# Enable ccache if USECCACHE=1 -#ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) -# override CXX:=ccache $(CXX) -#endif -#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) -# override AR:=ccache $(AR) -#endif -#ifneq ($(GPUCC),) -# ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) -# override GPUCC:=ccache $(GPUCC) -# endif -#endif - -#------------------------------------------------------------------------------- #=== Configure common compiler flags for C++ and CUDA/HIP @@ -350,11 +171,6 @@ endif ifeq ($(UNAME_P),ppc64le) CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for cppnone and cppsse4 # Throughput references without the extra flags below: cppnone=1.41-1.42E6, cppsse4=2.15-2.19E6 - ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change - ###CXXFLAGS+= -fpeel-loops # no change - ###CXXFLAGS+= -funroll-loops # gains ~1%% for cppnone, loses ~1%% for cppsse4 - ###CXXFLAGS+= -ftree-vectorize # no change - ###CXXFLAGS+= -flto # would increase to cppnone=4.08-4.12E6, cppsse4=4.99-5.03E6! else ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) @@ -684,21 +500,6 @@ GPUFLAGS += -Wno-deprecated-builtins endif endif -# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) -# This patch does remove the warning, but I prefer to keep it disabled for the moment... -###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -Wno-overriding-t-option -###ifneq ($(GPUCC),) -###$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -Wno-overriding-t-option -###endif -###endif - -#### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += -flto - -#### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess_cpp.o: CXXFLAGS += $(AVXFLAGS) - # Generic target and build rules: objects from C++ compilation # (NB do not include CUDA_INC here! add it only for NVTX or curand #679) $(BUILDDIR)/%%_cpp.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 041b8089d7..27acc6491b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -2289,10 +2289,10 @@ def edit_rwgt_runner(self): # are not used in the REX reweighting def generate_process_files(self): """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" - misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') + # misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') super().generate_process_files() - misc.sprint('Generating rwgt_runner files') + # misc.sprint('Generating rwgt_runner files') self.edit_rwgt_header() self.edit_rwgt_runner() - misc.sprint('Finished generating rwgt files') + # misc.sprint('Finished generating rwgt files') diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index b6f65dc0b1..20b5846555 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -601,8 +601,8 @@ def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, return def export_driver(self): - misc.sprint("In export_driver") - misc.sprint("Current working directory is: %s" % self.dir_path) + # misc.sprint("In export_driver") + # misc.sprint("Current working directory is: %s" % self.dir_path) replace_dict = {} replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index c445493f6e..885623ce2e 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -716,9 +716,9 @@ namespace REX std::string_view headWeight::getTag(){ return idTag; } bool headWeight::hasTag(){ return (idTag.size() > 0); } headWeight::headWeight(){ name = "weight"; return; } - headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } + headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet.substr(begin); return; } headWeight::headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin ) : xmlNode(){ - name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + name = "weight"; xmlFile = paramSet; content = paramSet.substr(begin); idTag = idText; id = idNo; } headWeight::headWeight( xmlNode& node ) : xmlNode( node ){ parser( false ); @@ -781,7 +781,7 @@ namespace REX } } headWeight::headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin ) : xmlNode(){ - name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; + name = "weight"; xmlFile = paramSet; content = paramSet.substr(begin); idTag = idText; id = idNo; } headWeight::headWeight( std::string_view paramSet, std::string& idText){ name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; @@ -1404,7 +1404,7 @@ namespace REX prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); } } - event::event( const event& original ){ + event::event( const event& original ) : xmlNode( original ){ this->rwgt = original.rwgt; this->header = original.header; this->prts = original.prts; @@ -3421,7 +3421,7 @@ namespace REX std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) { bool allRel = (vecVec.size() == relProcs.size()); - bool justRel = (vecVec.size() == std::count(relEvSets.begin(), relEvSets.end(), true)); + bool justRel = (vecVec.size() == size_t(std::count(relEvSets.begin(), relEvSets.end(), true))); std::vector relInds; if( !(allRel || justRel) ) throw std::range_error("vectorFlat: input vector size does not match number of (relevant) subprocesses"); for( size_t k = 0; k < relEvSets.size(); ++k ) @@ -3469,15 +3469,15 @@ namespace REX // ZW: templated fcn for multiplying two vectors elementwise, // assuming T has a multiplication operator* - template - std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ - if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } - auto valVec = std::make_shared>( vec1.size() ); - std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ - return v1 * v2; - } ); - return valVec; - } + // template + // std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ + // if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } + // auto valVec = std::make_shared>( vec1.size() ); + // std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ + // return v1 * v2; + // } ); + // return valVec; + // } // ZW: bool struct to define which double values // to extract transposed from LHE file diff --git a/tools/REX/REX.h b/tools/REX/REX.h index 4130f53a6f..4f4adcf5b4 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -59,8 +59,24 @@ namespace REX std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); extern template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); + std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ); + std::shared_ptr> nuLineSplitter( std::string_view currEvt ); std::shared_ptr> nuWordSplitter( std::string_view line ); - + std::shared_ptr> nuBlankSplitter( std::string_view currEvt ); + std::shared_ptr filePuller( const std::string& fileLoc ); + bool filePusher( std::string fileLoc, std::string fileCont ); + + // ZW: templated fcn for multiplying two vectors elementwise, + // assuming T has a multiplication operator* + template + std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ + if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } + auto valVec = std::make_shared>( vec1.size() ); + std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ + return v1 * v2; + } ); + return valVec; + } struct xmlTree; // ZW: struct for handling tags in XML node opening tags @@ -863,12 +879,6 @@ std::shared_ptr>>> lheValDoubles std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals = lheRetDs() ); -// struct lhePrt; -// struct xmlNode; -// struct event : public xmlNode; -// event& makeEv( std::vector>& particles ); -// std::vector> getParticles( event& ev ); -// struct eventComp; } #endif diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index e81769c600..6c19432978 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -25,7 +25,8 @@ #include #include #include -#include "REX.cc" +//#include "REX.cc" +#include "REX.h" #include "teawREX.h" namespace REX::teaw @@ -248,12 +249,12 @@ namespace REX::teaw auto possNamePos = srcCard.find_first_of( "-\n#", lnchPos[k] ); if( srcCard[possNamePos] == '-' ){ auto endLine = srcCard.find( "\n", possNamePos ); - auto opts = srcCard.substr( possNamePos, endLine - possNamePos ); - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( opts ); - auto namePos = opts.find( "rwgt_name" ); + auto locOpts = srcCard.substr( possNamePos, endLine - possNamePos ); + rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( locOpts ); + auto namePos = locOpts.find( "rwgt_name" ); if( namePos != REX::npos ){ - auto endName = opts.find_first_of( " \n\r\f\t\v", namePos ); - rwgtNames->push_back( std::string( opts.substr( namePos + 9, endName - namePos - 9 ) ) ); + auto endName = locOpts.find_first_of( " \n\r\f\t\v", namePos ); + rwgtNames->push_back( std::string( locOpts.substr( namePos + 9, endName - namePos - 9 ) ) ); } else { rwgtNames->push_back( "rwgt_" + std::to_string( k + 1 ) ); } @@ -754,10 +755,10 @@ namespace REX::teaw double xErr = std::stod(std::string(xSecLines[0]->xerrup)); for( size_t k = 0 ; k < reWgts->size() ; ++k ){ double xSecCurr = normXSecs->at(k); - auto wgts = reWgts->at(k); + auto locWgts = reWgts->at(k); double omega = 0.0; double omegaSqr = 0.0; - for( auto wgt : *wgts ){ + for( auto wgt : *locWgts ){ double invWgt = 1. / wgt; omega += invWgt; omegaSqr += invWgt * invWgt; @@ -785,9 +786,11 @@ namespace REX::teaw } std::shared_ptr> rwgtRunner::getReXSecs(){ if(this->calcXSecs()){ return normXSecs; } + return nullptr; } std::shared_ptr> rwgtRunner::getReXErrs(){ if(this->calcXErrs()){ return errXSecs; } + return nullptr; } void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ From 4f68de824cc054d1c73bb922648b0b44a6d55d2c Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 17 Sep 2024 16:23:05 +0200 Subject: [PATCH 29/76] removed legacy comments --- tools/REX/REX.cc | 28 ----------- tools/REX/rwgt_driver.cc | 21 +------- tools/REX/rwgt_instance.cc | 10 ---- tools/REX/rwgt_runner.cc | 2 - tools/REX/teawREX.cc | 100 +------------------------------------ 5 files changed, 4 insertions(+), 157 deletions(-) diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index 885623ce2e..817763619b 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -45,8 +45,6 @@ namespace REX { -// using sortFcn = std::function>(std::vector)>; -// using statSort = std::function>(std::string_view, std::vector)>; // ZW: index sorting function, which returs vector // of the indices of the original vector sorted @@ -248,16 +246,6 @@ namespace REX return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); } - // template - // bool clStringComp( const Str1& org, const Str2& comp ){ - // return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - // } - // template - // bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ - // return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), - // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - // } // ZW: templated fcn for finding a caseless substring searchTerm in srcFile // On failure to find searchTerm, returns REX::npos @@ -628,8 +616,6 @@ namespace REX } void xmlNode::endWriter() { if( isFaux() ){ return; } - //auto endSt = xmlFile.find_last_of("<", end); - //nodeEnd = xmlFile.substr( endSt, end - endSt ); nodeEnd = "\n"; } void xmlNode::contWriter() { @@ -2258,8 +2244,6 @@ namespace REX lheNode::lheNode() : xmlNode(){} lheNode::lheNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) : xmlNode(originFile, begin, childs){ - //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); - //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} for( auto child : children ){ if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } if( child->getName() == "init" ){ init = std::make_shared( *child, true ); continue; } @@ -3467,18 +3451,6 @@ namespace REX return valVec; } - // ZW: templated fcn for multiplying two vectors elementwise, - // assuming T has a multiplication operator* - // template - // std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ - // if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } - // auto valVec = std::make_shared>( vec1.size() ); - // std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ - // return v1 * v2; - // } ); - // return valVec; - // } - // ZW: bool struct to define which double values // to extract transposed from LHE file std::vector lheRetDs::getBools(){ diff --git a/tools/REX/rwgt_driver.cc b/tools/REX/rwgt_driver.cc index 555d21c7ee..7fe6a2f8a3 100644 --- a/tools/REX/rwgt_driver.cc +++ b/tools/REX/rwgt_driver.cc @@ -37,7 +37,6 @@ void writeRwgtCsv( std::string path, std::shared_ptr> n throw std::runtime_error( "Failed to open output file for writing." ); if( names->size() != xSecs->size() || names->size() != errXSecs->size() ) throw std::runtime_error( "Mismatch in number of processes, cross-sections, and errors when logging results." ); - //outFile << "Process, Cross-Section, Error\n"; for( size_t k = 0 ; k < names->size() ; ++k ) { outFile << names->at(k) << ", " << xSecs->at(k) << ", " << errXSecs->at(k) << "\n"; @@ -56,7 +55,7 @@ int main( int argc, char** argv ){ if (argc < 2){ return usage( argv[0] ); } - // READ COMMAND LINE ARGUMENTS + for( int i = 1; i < argc; i++ ) { auto currArg = std::string( argv[i] ); @@ -77,7 +76,6 @@ int main( int argc, char** argv ){ } } - if( lheFilePath.empty() || rwgtCardPath.empty() ){ return usage( argv[0] ); } @@ -112,10 +110,8 @@ int main( int argc, char** argv ){ static REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); static std::vector runSet = {%(run_set)s}; -// std::vector runSet; static REX::transSkel loadEvs = fileCol.initCards( runSet ); fileCol.initDoubles(); -// static std::vector&, unsigned int )>> fBridgeConstr; static std::vector fBridgeVec = {%(fbridge_vec)s}; static std::vector bridges; static std::vector amps; @@ -130,15 +126,7 @@ int main( int argc, char** argv ){ amps.push_back( currAmp ); ++relSet; } - // REX::teaw::ampCall subProcSet; - - // for( auto proc : runSet ){ - // subProcSet.insert( REX::teaw::ampPair( proc.procEventInt, proc.bridgeCall ) ); - // } - //auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); - - //std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; REX::teaw::rwgtRunner driver( fileCol, amps ); driver.runRwgt( outputPath ); @@ -146,12 +134,7 @@ int main( int argc, char** argv ){ auto rwgt_names = driver.getNames(); auto rwgt_xSecs = driver.getReXSecs(); auto rwgt_errXSecs = driver.getReXErrs(); - // for( size_t k = 0 ; k < rwgt_names->size() ; ++k ) - // { - // std::cout << "Process: " << rwgt_names->at(k) << "\n"; - // std::cout << "Cross-Section: " << rwgt_xSecs->at(k) << " +/- " << rwgt_errXSecs->at(k) << "\n"; - // } - + writeRwgtCsv( "rwgt_results.csv", rwgt_names, rwgt_xSecs, rwgt_errXSecs ); return 0; diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc index 4934b6d171..b22d1ee2a7 100644 --- a/tools/REX/rwgt_instance.cc +++ b/tools/REX/rwgt_instance.cc @@ -28,10 +28,6 @@ namespace rwgt{ void warpPad( std::vector& input, unsigned int nWarp = 32 ){ auto nEvt = input.size(); auto nWarpRemain = warpRemain( nEvt, nWarp ); -// auto fauxNEvt = nEvt + nWarpRemain; -// auto output = std::vector( fauxNEvt ); -// std::copy( input.begin(), input.end(), output.begin()); -// input.resize( fauxNEvt ); input.reserve( nEvt + nWarpRemain ); for( size_t k = nEvt - nWarpRemain ; k < nEvt ; ++k ){ input.push_back( input[k] ); @@ -135,12 +131,6 @@ namespace rwgt{ } std::shared_ptr> fBridge::bridgeCall( std::vector& momenta, std::vector& alphaS ){ if(this->nEvt == 0) this->bridgeSetup( alphaS ); - // for( auto j = 0 ; j < nWarpRemain ; ++j ){ - // alphaS.push_back( 0. ); - // for( auto k = 0 ; k < nMom * nPar ; ++k ){ - // momenta.push_back( 0. ); - // } - // } if( this->bridge == nullptr) throw std::runtime_error("fBridge object not defined."); warpPad( alphaS, nWarp ); warpPad( momenta, nWarp * nPar * nMom ); diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index ffbc17b38f..f7ad7e74a4 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -47,8 +47,6 @@ namespace %(process_namespace)s{ std::shared_ptr> procSort( std::string_view status, std::vector arguments, size_t index ){ std::vector> initPrts = {%(init_prt_ids)s}; std::vector> finPrts = {%(fin_prt_ids)s}; -// std::vector initPrts = {"-1"}; -// std::vector finPrts = {"1"}; std::shared_ptr> refOrder; if( index == REX::npos ){ if( status == "-1" ){ diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 6c19432978..147b09de98 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -166,64 +166,6 @@ namespace REX::teaw } std::string_view rwgtProc::comRunProc(){ return procString; } - // void rwgtCard::parse( bool parseOnline ) { - // auto strt = srcCard.find("launch"); - // auto commPos = srcCard.find_last_of("#", strt); - // while( commPos > srcCard.find_last_of("\n", strt) ){ - // if( commPos == REX::npos ){ - // break; - // } - // strt = srcCard.find("launch", strt + 6 ); - // } - // while( auto chPos = srcCard.find( "set" ) < strt ){ - // if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } - // opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); - // } - // std::vector lnchPos({strt}); - // auto nuLnch = srcCard.find( "launch", strt + 6 ); - // while ( nuLnch != std::string_view::npos ) - // { - // if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } - // nuLnch = srcCard.find( "launch", nuLnch + 6 ); - // } - // for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) - // { - // auto strtLi = srcCard.find( "set", lnchPos[k] ); - // rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); - // if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ - // auto strtPos = srcCard.find( "--", lnchPos[k] ); - // while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ - // auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); - // rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); - // if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ - // rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. - // rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); - // } - // if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } - // strtPos = nuStrtPos; - // } - // } - // } - // size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); - // if( srcCard.substr( endLi + 1, 3 ) == "set" ){ - // while( srcCard.substr( endLi + 1, 3 ) == "set" ) - // { - // endLi = srcCard.find( "\n", endLi + 1 ); - // } - // rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); - // } - // rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); - // rwgtNames->reserve( rwgtRuns.size() ); - // int p = 1; - // for( auto run : rwgtRuns ){ - // rwgtProcs.push_back( run.comRunProc() ); - // if( run.rwgtName == "" ){ - // rwgtNames->push_back( "rwgt_" + std::to_string( p++ ) ); - // } else { - // rwgtNames->push_back( std::string(run.rwgtName) ); - // } - // } - // } void rwgtCard::parse( bool parseOnline ){ auto allLaunchPos = REX::nuFindEach( this->srcCard, "launch" ); std::vector lnchPos; @@ -319,7 +261,6 @@ namespace REX::teaw } void rwgtCollection::setLhe( std::string_view lhe_file ){ if( lheFileSet ){ return; } - //lheFile = REX::lheParser( lhe_file, strt, post ); lheFile = std::make_shared( REX::lheNode(lhe_file) ); lheFileSet = true; } @@ -370,7 +311,6 @@ namespace REX::teaw auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); - //wgts[0] = vecOfVecs->at( 0 ); gS[0] = vecOfVecs->at( 1 ); momenta[0] = vecOfVecs->at( 2 ); for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) { wgts.push_back( vecOfVecs->at( 3*k ) ); @@ -478,11 +418,7 @@ namespace REX::teaw void rwgtRunner::setMeEval( amplitude eval ){ meEval = eval; meInit = true; -// ampCall nuEvals; -// nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); -// meEvals = nuEvals; } -// void rwgtRunner::setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } void rwgtRunner::addMeEval( const REX::event& ev, const amplitude& eval ){}// meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } rwgtRunner::rwgtRunner() : rwgtFiles(){ return; } rwgtRunner::rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } @@ -490,10 +426,6 @@ namespace REX::teaw meEval = meCalc; meInit = true; } - // rwgtRunner::rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ - // meEvals = meCalcs; - // meCompInit = true; - // } rwgtRunner::rwgtRunner( rwgtFiles& rwgts, std::vector& meCalcs ) : rwgtFiles( rwgts ){ meVec = meCalcs; meCompInit = true; @@ -519,11 +451,6 @@ namespace REX::teaw this->ampNorm = rwgts.ampNorm; this->reWgts = rwgts.reWgts; } - // rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - // ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ - // meEvals = meCalcs; - // meCompInit = true; - // } bool rwgtRunner::oneME(){ return (meInit != meCompInit); } bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } template @@ -543,14 +470,8 @@ namespace REX::teaw } } else{ - // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - // { - // auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); - // initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); - // } - } - //auto ins = meEval( *(momenta[0]), *(gS[0]) ); - //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; + // DO NOT ALLOW FOR SINGLE ME WITHOUT PASSING EVERYTHING THROUGH VECTOR + } meSet = true; } bool rwgtRunner::setParamCard( std::shared_ptr slhaParams ){ @@ -563,8 +484,6 @@ namespace REX::teaw return true; } void rwgtRunner::setNormWgtsSingleME(){ - //if( initMEs->size() != wgts[0]->size() ) - // throw std::runtime_error( "Inconsistent number of events and event weights." ); meNormWgts = {std::make_shared>( wgts[0]->size() )}; for( size_t k = 0; k < initMEs[0]->size(); k++ ){ meNormWgts[0]->at( k ) = wgts[0]->at( k ) / initMEs[0]->at( k ); @@ -613,8 +532,6 @@ namespace REX::teaw template void rwgtRunner::setNormWgts(Args&&... args){ if( !oneME() ){ setMEs(args...); } - //if( initMEs->size() != wgts[0]->size() ) - // throw std::runtime_error( "Inconsistent number of events and event weights." ); for( size_t k = 0; k < initMEs.size() ; ++k ){ if( initMEs[k]->size() != wgts[k]->size() ) throw std::runtime_error( "Inconsistent number of events and event weights." ); @@ -635,10 +552,6 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - // { - // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - // } std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } @@ -678,7 +591,6 @@ namespace REX::teaw throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); if( !setParamCard( slhaParams ) ) throw std::runtime_error( "Failed to rewrite parameter card." ); - //auto newMEs = meEval( *momenta, *gS ); std::shared_ptr> newWGTs; if( singAmp() ){ auto newMEs = meEval( *momenta[0], *gS[0] ); @@ -686,10 +598,6 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - // { - // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - // } std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } @@ -712,10 +620,6 @@ namespace REX::teaw } else{ std::vector>> nuMEs = {}; - // for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - // { - // nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - // } std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); newWGTs = REX::vecElemMult( *newMEs, *normWgt ); } From 55fdda981da70a1a4f565e32943f37874e72ac73 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 23 Sep 2024 11:40:46 +0200 Subject: [PATCH 30/76] removed legacy code and files, renamed some functions to make naming clearer --- tools/REX/10simevs.lhe | 550 ----- tools/REX/2diffevs.lhe | 400 ---- tools/REX/2simevs.lhe | 407 ---- tools/REX/REX.cc | 200 +- tools/REX/REX.h | 10 +- tools/REX/REX.hpp | 3693 --------------------------------- tools/REX/pepper.cu | 169 -- tools/REX/rwgt_runner_copy.cc | 197 -- tools/REX/teawREX.cc | 20 +- tools/REX/teawREX.hpp | 554 ----- tools/REX/tester.cpp | 77 - 11 files changed, 39 insertions(+), 6238 deletions(-) delete mode 100644 tools/REX/10simevs.lhe delete mode 100644 tools/REX/2diffevs.lhe delete mode 100644 tools/REX/2simevs.lhe delete mode 100644 tools/REX/REX.hpp delete mode 100644 tools/REX/pepper.cu delete mode 100644 tools/REX/rwgt_runner_copy.cc delete mode 100644 tools/REX/teawREX.hpp delete mode 100644 tools/REX/tester.cpp diff --git a/tools/REX/10simevs.lhe b/tools/REX/10simevs.lhe deleted file mode 100644 index 23432065d5..0000000000 --- a/tools/REX/10simevs.lhe +++ /dev/null @@ -1,550 +0,0 @@ - -
- - -3.5.2 - - - t t~ > w+ b w- b~ -output -]]> - - -#********************************************************************* -# MadGraph/MadEvent * -# http://madgraph.hep.uiuc.edu * -# * -# proc_card.dat * -#********************************************************************* -# * -# This Files is generated by MADGRAPH 5 * -# * -# WARNING: This Files is generated for MADEVENT (compatibility issue)* -# This files is NOT a valid MG4 proc_card.dat * -# Running this in MG4 will NEVER reproduce the result of MG5* -# * -#********************************************************************* -#********************************************************************* -# Process(es) requested : mg2 input * -#********************************************************************* -# Begin PROCESS # This is TAG. Do not modify this line -g g > t t~ > w+ b w- b~ #Process -# Be carefull the coupling are here in MG5 convention - -end_coup # End the couplings input - -done # this tells MG there are no more procs -# End PROCESS # This is TAG. Do not modify this line -#********************************************************************* -# Model information * -#********************************************************************* -# Begin MODEL # This is TAG. Do not modify this line -sm -# End MODEL # This is TAG. Do not modify this line -#********************************************************************* -# Start multiparticle definitions * -#********************************************************************* -# Begin MULTIPARTICLES # This is TAG. Do not modify this line - -# End MULTIPARTICLES # This is TAG. Do not modify this line - - - - - -###################################################################### -## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### -###################################################################### -## ## -## Width set on Auto will be computed following the information ## -## present in the decay.py files of the model. ## -## See arXiv:1402.1178 for more details. ## -## ## -###################################################################### - -################################### -## INFORMATION FOR MASS -################################### -Block mass - 5 4.700000e+00 # MB - 6 1.730000e+02 # MT - 15 1.777000e+00 # MTA - 23 9.118800e+01 # MZ - 25 1.250000e+02 # MH -## Dependent parameters, given by model restrictions. -## Those values should be edited following the -## analytical expression. MG5 ignores those values -## but they are important for interfacing the output of MG5 -## to external program such as Pythia. - 1 0.000000e+00 # d : 0.0 - 2 0.000000e+00 # u : 0.0 - 3 0.000000e+00 # s : 0.0 - 4 0.000000e+00 # c : 0.0 - 11 0.000000e+00 # e- : 0.0 - 12 0.000000e+00 # ve : 0.0 - 13 0.000000e+00 # mu- : 0.0 - 14 0.000000e+00 # vm : 0.0 - 16 0.000000e+00 # vt : 0.0 - 21 0.000000e+00 # g : 0.0 - 22 0.000000e+00 # a : 0.0 - 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) - -################################### -## INFORMATION FOR SMINPUTS -################################### -Block sminputs - 1 1.325070e+02 # aEWM1 - 2 1.166390e-05 # Gf - 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) - -################################### -## INFORMATION FOR YUKAWA -################################### -Block yukawa - 5 4.700000e+00 # ymb - 6 1.730000e+02 # ymt - 15 1.777000e+00 # ymtau - -################################### -## INFORMATION FOR DECAY -################################### -DECAY 6 1.491500e+00 # WT -DECAY 23 2.441404e+00 # WZ -DECAY 24 2.047600e+00 # WW -DECAY 25 6.382339e-03 # WH -## Dependent parameters, given by model restrictions. -## Those values should be edited following the -## analytical expression. MG5 ignores those values -## but they are important for interfacing the output of MG5 -## to external program such as Pythia. -DECAY 1 0.000000e+00 # d : 0.0 -DECAY 2 0.000000e+00 # u : 0.0 -DECAY 3 0.000000e+00 # s : 0.0 -DECAY 4 0.000000e+00 # c : 0.0 -DECAY 5 0.000000e+00 # b : 0.0 -DECAY 11 0.000000e+00 # e- : 0.0 -DECAY 12 0.000000e+00 # ve : 0.0 -DECAY 13 0.000000e+00 # mu- : 0.0 -DECAY 14 0.000000e+00 # vm : 0.0 -DECAY 15 0.000000e+00 # ta- : 0.0 -DECAY 16 0.000000e+00 # vt : 0.0 -DECAY 21 0.000000e+00 # g : 0.0 -DECAY 22 0.000000e+00 # a : 0.0 - - -# Number of Events : 10 -# Integrated weight (pb) : 439.19338 - -
- -2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 -4.391934e+02 3.661122e+00 4.391934e+02 1 -please cite 1405.0301 - - - 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 - -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 - 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 - 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 - -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.21669541E+03 -0 - 1 21 0.24236690E-01 0.21840939E+03 - 1 21 0.65523357E-01 0.21840939E+03 - 0.33953413E+04 - - - - 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.31576070E+03 -0 - 1 21 0.81905139E-01 0.31588770E+03 - 1 21 0.32388313E-01 0.31588770E+03 - 0.11189986E+04 - - - - 7 1 +4.3919338e+02 2.51159400e+02 7.54677100e-03 1.11876800e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +8.2851295259e+01 8.2851295259e+01 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -2.1984564692e+03 2.1984564692e+03 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -6 2 1 2 0 502 -7.7080486205e+01 -1.0172807019e+02 -7.8449770191e+01 2.2872812745e+02 1.7283572612e+02 0.0000e+00 0.0000e+00 - 24 1 1 2 0 0 +5.9800337718e+00 +7.0764350150e+01 -4.1759378302e+02 4.3115558402e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 - 5 1 1 2 501 0 +7.1100452433e+01 +3.0963720041e+01 -1.6195616207e+03 1.6214240530e+03 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 -4.7255780753e+01 +1.4691445127e+01 -3.8855826763e+01 1.0210748553e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 3 3 0 502 -2.9824705452e+01 -1.1641951532e+02 -3.9593943428e+01 1.2662064191e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.24154488E+03 -0 - 1 21 0.12746352E-01 0.25115937E+03 - 1 21 0.33822410E+00 0.25115937E+03 - 0.10034989E+03 - - - - 8 1 +4.3919338e+02 1.78714900e+02 7.54677100e-03 1.16958300e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +4.3091677531e+02 4.3091677531e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.0391889102e+02 1.0391889102e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -6 2 1 2 0 503 -3.6067364068e+01 -1.4649928734e+01 +1.7471337281e+01 1.7742037393e+02 1.7221285172e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 +3.6067364068e+01 +1.4649928734e+01 +3.0952654701e+02 3.5741529240e+02 1.7442342955e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 +2.2734065795e+01 +3.3518972368e+01 +6.3307765789e+01 1.1007019769e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 - 5 1 4 4 501 0 +1.3333298273e+01 -1.8869043633e+01 +2.4621878122e+02 2.4734509471e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +3.4800320870e+01 +1.3510658921e+01 +4.0711502437e+01 9.7561481242e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 3 3 0 503 -7.0867684937e+01 -2.8160587655e+01 -2.3240165156e+01 7.9858892692e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.17817321E+03 -0 - 1 21 0.66294888E-01 0.17871488E+03 - 1 21 0.15987522E-01 0.17871488E+03 - 0.73335693E+04 - - - - 8 1 +4.3919338e+02 2.04872300e+02 7.54677100e-03 1.14579500e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +3.7895944857e+02 3.7895944857e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.5403273068e+02 4.5403273068e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 6 2 1 2 501 0 -9.4158773160e+01 +4.2436191949e+01 -3.9984076151e+02 4.4927194413e+02 1.7693484753e+02 0.0000e+00 0.0000e+00 - -6 2 1 2 0 502 +9.4158773160e+01 -4.2436191949e+01 +3.2476747940e+02 3.8372023512e+02 1.7635361696e+02 0.0000e+00 0.0000e+00 - 24 1 3 3 0 0 -1.0155138529e+02 +3.1755977097e+01 -4.1062842852e+02 4.3174535111e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 3 3 501 0 +7.3926121347e+00 +1.0680214852e+01 +1.0787667014e+01 1.7526593025e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 4 4 0 0 +2.8155867650e+01 -1.5152453482e+01 +2.9281756166e+02 3.0533864823e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 - -5 1 4 4 0 502 +6.6002905510e+01 -2.7283738467e+01 +3.1949917737e+01 7.8381586893e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.20474671E+03 -0 - 1 21 0.58301453E-01 0.20487227E+03 - 1 21 0.69851190E-01 0.20487227E+03 - 0.50816560E+03 - - - - 8 1 +4.3919338e+02 2.01015900e+02 7.54677100e-03 1.15150100e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.0319621128e+02 6.0319621128e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -9.8768974383e+01 9.8768974383e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - -6 2 1 2 0 503 +1.4738675699e+01 +7.6352852286e+01 +4.6116481070e+01 2.0623803560e+02 1.8536561409e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 -1.4738675699e+01 -7.6352852286e+01 +4.5831075582e+02 4.9572715006e+02 1.7219080054e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 -4.3843112218e+00 -4.1059533654e+00 +3.8045368848e+02 3.8890655185e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 4 4 501 0 -1.0354364477e+01 -7.2246898921e+01 +7.7857067340e+01 1.0682059821e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +2.7266458026e+01 +1.2364655742e+02 +4.6653600940e+01 1.5708502580e+02 8.0419002446e+01 0.0000e+00 1.0000e+00 - -5 1 3 3 0 503 -1.2527782328e+01 -4.7293705129e+01 -5.3711986978e-01 4.9153009803e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.19792528E+03 -0 - 1 21 0.92799415E-01 0.20101591E+03 - 1 21 0.15195227E-01 0.20101591E+03 - 0.37319721E+04 - - - - 8 1 +4.3919338e+02 1.74602100e+02 7.54677100e-03 1.17351100e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +9.1248808608e+02 9.1248808608e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -3.9355927787e+01 3.9355927787e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 6 2 1 2 501 0 -2.5269092142e+00 -7.0232873774e+00 +2.4558729161e+02 3.0046206483e+02 1.7294109624e+02 0.0000e+00 0.0000e+00 - -6 2 1 2 0 502 +2.5269092142e+00 +7.0232873774e+00 +6.2754486669e+02 6.5138194904e+02 1.7444246039e+02 0.0000e+00 0.0000e+00 - 24 1 3 3 0 0 +6.0339304741e+01 -1.2037996311e+01 +1.0278716341e+02 1.4428500258e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 3 3 501 0 -6.2866213955e+01 +5.0147089339e+00 +1.4280012819e+02 1.5617706226e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 4 4 0 0 -2.3438702161e+01 -4.5120986152e+01 +2.2033674694e+02 2.4000161283e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 4 4 0 502 +2.5965611375e+01 +5.2144273529e+01 +4.0720811975e+02 4.1138033621e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.17422586E+03 -0 - 1 21 0.14038277E+00 0.17460207E+03 - 1 21 0.60547588E-02 0.17460207E+03 - 0.69618335E+04 - - - - 8 1 +4.3919338e+02 1.88007500e+02 7.54677100e-03 1.16157200e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +6.3582462508e+02 6.3582462508e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.6129828482e+02 1.6129828482e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -6 2 1 2 0 503 +4.8116344176e+01 -2.9059974330e+01 -8.6672370246e+01 2.0702396972e+02 1.7940780248e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 -4.8116344176e+01 +2.9059974330e+01 +5.6119871050e+02 5.9009894018e+02 1.7353127052e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 -1.6998242631e+01 +4.7411725194e+01 +1.5136221811e+02 1.7864643563e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 4 4 501 0 -3.1118101545e+01 -1.8351750864e+01 +4.0983649239e+02 4.1145250455e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +3.9713907948e+01 -3.9675651300e+00 +2.4165830325e+01 9.2973863893e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 - -5 1 3 3 0 503 +8.4024362278e+00 -2.5092409200e+01 -1.1083820057e+02 1.1405010583e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.18659178E+03 -0 - 1 21 0.97819172E-01 0.18800751E+03 - 1 21 0.24815121E-01 0.18800751E+03 - 0.13327893E+04 - - - - 8 1 +4.3919338e+02 2.47273800e+02 7.54677100e-03 1.11506100e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +4.7922758970e+02 4.7922758970e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -1.2754705753e+02 1.2754705753e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 6 2 1 2 501 0 +1.6471793564e+02 +6.3009040540e+01 +1.7155189565e+02 3.0095574765e+02 1.7332677701e+02 0.0000e+00 0.0000e+00 - -6 2 1 2 0 502 -1.6471793564e+02 -6.3009040540e+01 +1.8012863652e+02 3.0581889959e+02 1.7313791075e+02 0.0000e+00 0.0000e+00 - 24 1 3 3 0 0 +5.6303415524e+01 +9.2300656218e+01 +8.8725358462e+01 1.6133471705e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 - 5 1 3 3 501 0 +1.0841452012e+02 -2.9291615679e+01 +8.2826537186e+01 1.3962103059e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 4 4 0 0 -1.6187054837e+02 -9.3582557312e+01 +1.1528314764e+02 2.3391705698e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 4 4 0 502 -2.8473872739e+00 +3.0573516772e+01 +6.4845488879e+01 7.1901842605e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.24724066E+03 -0 - 1 21 0.73727321E-01 0.24727375E+03 - 1 21 0.19622624E-01 0.24727375E+03 - 0.38478950E+04 - - - - 8 1 +4.3919338e+02 1.82375300e+02 7.54677100e-03 1.16561400e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +3.2305192784e+02 3.2305192784e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -1.3439332851e+02 1.3439332851e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - -6 2 1 2 0 503 +4.6712402287e+01 -3.3471733509e+01 -1.6591366530e+01 1.8312847738e+02 1.7308483560e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 -4.6712402287e+01 +3.3471733509e+01 +2.0524996585e+02 2.7431677897e+02 1.7268393460e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 -5.0429547514e+01 +6.7234938560e+01 +2.0963658148e+02 2.3974650878e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 4 4 501 0 +3.7171452269e+00 -3.3763205051e+01 -4.3866156262e+00 3.4570270185e+01 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +2.0594294555e+01 +4.7013575059e+01 +5.4619595756e+00 9.5558621614e+01 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 3 3 0 503 +2.6118107732e+01 -8.0485308568e+01 -2.2053326106e+01 8.7569855767e+01 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.18228016E+03 -0 - 1 21 0.49700296E-01 0.18237534E+03 - 1 21 0.20675897E-01 0.18237534E+03 - 0.84041211E+04 - - -
diff --git a/tools/REX/2diffevs.lhe b/tools/REX/2diffevs.lhe deleted file mode 100644 index 634129df21..0000000000 --- a/tools/REX/2diffevs.lhe +++ /dev/null @@ -1,400 +0,0 @@ - -
- - -3.5.2 - - - 3j -output -]]> - - -#********************************************************************* -# MadGraph/MadEvent * -# http://madgraph.hep.uiuc.edu * -# * -# proc_card.dat * -#********************************************************************* -# * -# This Files is generated by MADGRAPH 5 * -# * -# WARNING: This Files is generated for MADEVENT (compatibility issue)* -# This files is NOT a valid MG4 proc_card.dat * -# Running this in MG4 will NEVER reproduce the result of MG5* -# * -#********************************************************************* -#********************************************************************* -# Process(es) requested : mg2 input * -#********************************************************************* -# Begin PROCESS # This is TAG. Do not modify this line -p p > 3j #Process -# Be carefull the coupling are here in MG5 convention - -end_coup # End the couplings input - -done # this tells MG there are no more procs -# End PROCESS # This is TAG. Do not modify this line -#********************************************************************* -# Model information * -#********************************************************************* -# Begin MODEL # This is TAG. Do not modify this line -sm -# End MODEL # This is TAG. Do not modify this line -#********************************************************************* -# Start multiparticle definitions * -#********************************************************************* -# Begin MULTIPARTICLES # This is TAG. Do not modify this line - -# End MULTIPARTICLES # This is TAG. Do not modify this line - - - - - -###################################################################### -## PARAM_CARD AUTOMATICALY GENERATED BY MG5 #### -###################################################################### -################################### -## INFORMATION FOR MASS -################################### -BLOCK MASS # - 5 4.700000e+00 # mb - 6 1.730000e+02 # mt - 15 1.777000e+00 # mta - 23 9.118800e+01 # mz - 25 1.250000e+02 # mh - 1 0.000000e+00 # d : 0.0 - 2 0.000000e+00 # u : 0.0 - 3 0.000000e+00 # s : 0.0 - 4 0.000000e+00 # c : 0.0 - 11 0.000000e+00 # e- : 0.0 - 12 0.000000e+00 # ve : 0.0 - 13 0.000000e+00 # mu- : 0.0 - 14 0.000000e+00 # vm : 0.0 - 16 0.000000e+00 # vt : 0.0 - 21 0.000000e+00 # g : 0.0 - 22 0.000000e+00 # a : 0.0 - 24 8.041900e+01 # w+ : cmath.sqrt(mz__exp__2/2. + cmath.sqrt(mz__exp__4/4. - (aew*cmath.pi*mz__exp__2)/(gf*sqrt__2))) -################################### -## INFORMATION FOR SMINPUTS -################################### -BLOCK SMINPUTS # - 1 1.325070e+02 # aewm1 - 2 1.166390e-05 # gf - 3 1.300000e-01 # as (note that parameter not used if you use a pdf set) -################################### -## INFORMATION FOR YUKAWA -################################### -BLOCK YUKAWA # - 5 4.700000e+00 # ymb - 6 1.730000e+02 # ymt - 15 1.777000e+00 # ymtau -################################### -## INFORMATION FOR DECAY -################################### -DECAY 6 1.491500e+00 # wt -DECAY 23 2.441404e+00 # wz -DECAY 24 2.047600e+00 # ww -DECAY 25 6.382339e-03 # wh -DECAY 1 0.000000e+00 # d : 0.0 -DECAY 2 0.000000e+00 # u : 0.0 -DECAY 3 0.000000e+00 # s : 0.0 -DECAY 4 0.000000e+00 # c : 0.0 -DECAY 5 0.000000e+00 # b : 0.0 -DECAY 11 0.000000e+00 # e- : 0.0 -DECAY 12 0.000000e+00 # ve : 0.0 -DECAY 13 0.000000e+00 # mu- : 0.0 -DECAY 14 0.000000e+00 # vm : 0.0 -DECAY 15 0.000000e+00 # ta- : 0.0 -DECAY 16 0.000000e+00 # vt : 0.0 -DECAY 21 0.000000e+00 # g : 0.0 -DECAY 22 0.000000e+00 # a : 0.0 - - -# Number of Events : 100 -# Integrated weight (pb) : 66372287.22200001 - -
- -2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 -6.637229e+07 1.268397e+06 6.637229e+07 1 -please cite 1405.0301 - - - 5 1 +6.6372287e+07 3.25558900e+01 7.54677100e-03 1.57144200e-01 - 21 -1 0 0 505 501 +0.0000000000e+00 +0.0000000000e+00 +2.7974513959e+02 2.7974513959e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 -1 0 0 501 502 -0.0000000000e+00 -0.0000000000e+00 -5.0115268359e+01 5.0115268359e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 503 504 +1.1282244936e+00 +2.2858622638e+01 +3.8461797268e-02 2.2886480698e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 +2.9194898468e+00 -4.2605139346e+01 -2.6389333299e+01 5.0200779193e+01 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 505 503 -4.0477143403e+00 +1.9746516708e+01 +2.5598074273e+02 2.5677314806e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - - 3 0.32555892E+02 -0 - 1 21 0.43037713E-01 0.32555892E+02 - 1 21 0.77100414E-02 0.32555892E+02 - 0.65037882E+05 - - - - 5 1 +6.6372287e+07 3.05908400e+01 7.54677100e-03 1.59164800e-01 - 2 -1 0 0 503 0 -0.0000000000e+00 +0.0000000000e+00 +4.5729905700e+02 4.5729905700e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 502 +0.0000000000e+00 -0.0000000000e+00 -6.3253912877e+02 6.3253912877e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 21 1 1 2 503 504 +1.1420284484e+00 +2.8694844708e+01 +1.2159916921e+02 1.2494421273e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 1 1 2 504 502 -2.2459074491e+01 -2.0815319355e+01 -6.3010778840e+02 6.3085141876e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 2 1 1 2 501 0 +2.1317046043e+01 -7.8795253530e+00 +3.3326854742e+02 3.3404255428e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - - 3 0.30590836E+02 -0 - 1 21 0.97313711E-01 0.30590836E+02 - 1 2 0.70353702E-01 0.30590836E+02 - 0.91658669E+02 - - -
diff --git a/tools/REX/2simevs.lhe b/tools/REX/2simevs.lhe deleted file mode 100644 index 6fb52dd403..0000000000 --- a/tools/REX/2simevs.lhe +++ /dev/null @@ -1,407 +0,0 @@ - -
- - -3.5.2 - - - t t~ > w+ b w- b~ -output -]]> - - -#********************************************************************* -# MadGraph/MadEvent * -# http://madgraph.hep.uiuc.edu * -# * -# proc_card.dat * -#********************************************************************* -# * -# This Files is generated by MADGRAPH 5 * -# * -# WARNING: This Files is generated for MADEVENT (compatibility issue)* -# This files is NOT a valid MG4 proc_card.dat * -# Running this in MG4 will NEVER reproduce the result of MG5* -# * -#********************************************************************* -#********************************************************************* -# Process(es) requested : mg2 input * -#********************************************************************* -# Begin PROCESS # This is TAG. Do not modify this line -g g > t t~ > w+ b w- b~ #Process -# Be carefull the coupling are here in MG5 convention - -end_coup # End the couplings input - -done # this tells MG there are no more procs -# End PROCESS # This is TAG. Do not modify this line -#********************************************************************* -# Model information * -#********************************************************************* -# Begin MODEL # This is TAG. Do not modify this line -sm -# End MODEL # This is TAG. Do not modify this line -#********************************************************************* -# Start multiparticle definitions * -#********************************************************************* -# Begin MULTIPARTICLES # This is TAG. Do not modify this line - -# End MULTIPARTICLES # This is TAG. Do not modify this line - - - - - -###################################################################### -## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL #### -###################################################################### -## ## -## Width set on Auto will be computed following the information ## -## present in the decay.py files of the model. ## -## See arXiv:1402.1178 for more details. ## -## ## -###################################################################### - -################################### -## INFORMATION FOR MASS -################################### -Block mass - 5 4.700000e+00 # MB - 6 1.730000e+02 # MT - 15 1.777000e+00 # MTA - 23 9.118800e+01 # MZ - 25 1.250000e+02 # MH -## Dependent parameters, given by model restrictions. -## Those values should be edited following the -## analytical expression. MG5 ignores those values -## but they are important for interfacing the output of MG5 -## to external program such as Pythia. - 1 0.000000e+00 # d : 0.0 - 2 0.000000e+00 # u : 0.0 - 3 0.000000e+00 # s : 0.0 - 4 0.000000e+00 # c : 0.0 - 11 0.000000e+00 # e- : 0.0 - 12 0.000000e+00 # ve : 0.0 - 13 0.000000e+00 # mu- : 0.0 - 14 0.000000e+00 # vm : 0.0 - 16 0.000000e+00 # vt : 0.0 - 21 0.000000e+00 # g : 0.0 - 22 0.000000e+00 # a : 0.0 - 24 8.041900e+01 # w+ : cmath.sqrt(MZ__exp__2/2. + cmath.sqrt(MZ__exp__4/4. - (aEW*cmath.pi*MZ__exp__2)/(Gf*sqrt__2))) - -################################### -## INFORMATION FOR SMINPUTS -################################### -Block sminputs - 1 1.325070e+02 # aEWM1 - 2 1.166390e-05 # Gf - 3 1.180000e-01 # aS (Note that Parameter not used if you use a PDF set) - -################################### -## INFORMATION FOR YUKAWA -################################### -Block yukawa - 5 4.700000e+00 # ymb - 6 1.730000e+02 # ymt - 15 1.777000e+00 # ymtau - -################################### -## INFORMATION FOR DECAY -################################### -DECAY 6 1.491500e+00 # WT -DECAY 23 2.441404e+00 # WZ -DECAY 24 2.047600e+00 # WW -DECAY 25 6.382339e-03 # WH -## Dependent parameters, given by model restrictions. -## Those values should be edited following the -## analytical expression. MG5 ignores those values -## but they are important for interfacing the output of MG5 -## to external program such as Pythia. -DECAY 1 0.000000e+00 # d : 0.0 -DECAY 2 0.000000e+00 # u : 0.0 -DECAY 3 0.000000e+00 # s : 0.0 -DECAY 4 0.000000e+00 # c : 0.0 -DECAY 5 0.000000e+00 # b : 0.0 -DECAY 11 0.000000e+00 # e- : 0.0 -DECAY 12 0.000000e+00 # ve : 0.0 -DECAY 13 0.000000e+00 # mu- : 0.0 -DECAY 14 0.000000e+00 # vm : 0.0 -DECAY 15 0.000000e+00 # ta- : 0.0 -DECAY 16 0.000000e+00 # vt : 0.0 -DECAY 21 0.000000e+00 # g : 0.0 -DECAY 22 0.000000e+00 # a : 0.0 - - -# Number of Events : 2 -# Integrated weight (pb) : 439.19338 - -
- -2212 2212 6.500000e+03 6.500000e+03 0 0 247000 247000 -4 1 -4.391934e+02 3.661122e+00 4.391934e+02 1 -please cite 1405.0301 - - - 8 1 +4.3919338e+02 2.18409400e+02 7.54677100e-03 1.13637100e-01 - 21 -1 0 0 503 502 +0.0000000000e+00 +0.0000000000e+00 +1.5753848612e+02 1.5753848612e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 501 503 -0.0000000000e+00 -0.0000000000e+00 -4.2590181999e+02 4.2590181999e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - 6 2 1 2 501 0 +1.2998184096e+02 -6.4884027876e+00 -2.9537542099e+02 3.6336687781e+02 1.6688759754e+02 0.0000e+00 0.0000e+00 - -6 2 1 2 0 502 -1.2998184096e+02 +6.4884027876e+00 +2.7012087117e+01 2.2007342830e+02 1.7540034961e+02 0.0000e+00 0.0000e+00 - 24 1 3 3 0 0 +5.0317013823e+00 -2.3598693140e+01 -1.3935351491e+02 1.6269245345e+02 8.0419002446e+01 0.0000e+00 -1.0000e+00 - 5 1 3 3 501 0 +1.2495013958e+02 +1.7110290353e+01 -1.5602190608e+02 2.0067442436e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 4 4 0 0 -2.8600069986e+01 +2.4574536812e+01 -3.9691506658e+01 9.7285679922e+01 8.0419002446e+01 0.0000e+00 -1.0000e+00 - -5 1 4 4 0 502 -1.0138177097e+02 -1.8086134024e+01 +6.6703593775e+01 1.2278774838e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.21669541E+03 -0 - 1 21 0.24236690E-01 0.21840939E+03 - 1 21 0.65523357E-01 0.21840939E+03 - 0.33953413E+04 - - - - 8 1 +4.3919338e+02 3.15887700e+02 7.54677100e-03 1.07761700e-01 - 21 -1 0 0 501 502 +0.0000000000e+00 +0.0000000000e+00 +5.3238340901e+02 5.3238340901e+02 0.0000000000e+00 0.0000e+00 1.0000e+00 - 21 -1 0 0 502 503 -0.0000000000e+00 -0.0000000000e+00 -2.1052403255e+02 2.1052403255e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00 - -6 2 1 2 0 503 +6.6382935030e+01 +2.5548643690e+02 +3.7219413376e+01 3.1807282501e+02 1.7350799341e+02 0.0000e+00 0.0000e+00 - 6 2 1 2 501 0 -6.6382935030e+01 -2.5548643690e+02 +2.8463996308e+02 4.2483461655e+02 1.7258194961e+02 0.0000e+00 0.0000e+00 - 24 1 4 4 0 0 +1.6661005347e-01 -1.8051085807e+02 +2.4998404651e+02 3.1865880986e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - 5 1 4 4 501 0 -6.6549545084e+01 -7.4975578835e+01 +3.4655916570e+01 1.0617580669e+02 4.7000000000e+00 0.0000e+00 -1.0000e+00 - -24 1 3 3 0 0 +7.2402758048e+01 +1.6925425433e+02 -3.6729050251e+01 2.0421900271e+02 8.0419002446e+01 0.0000e+00 0.0000e+00 - -5 1 3 3 0 503 -6.0198230171e+00 +8.6232182571e+01 +7.3948463627e+01 1.1385382230e+02 4.7000000000e+00 0.0000e+00 1.0000e+00 - - 2 0.31576070E+03 -0 - 1 21 0.81905139E-01 0.31588770E+03 - 1 21 0.32388313E-01 0.31588770E+03 - 0.11189986E+04 - - -
diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index 817763619b..dcc5dbbac3 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -111,7 +111,7 @@ namespace REX // ZW: minimal fcn for counting the amount of times // a given search term appears in a string - int nuStrCount( std::string_view searchString, std::string_view searchTerm ) + int strCount( std::string_view searchString, std::string_view searchTerm ) { int count = 0; size_t pos = 0; @@ -124,11 +124,11 @@ namespace REX // ZW: fcn for finding the location of each // entry of seachTerm in the given string textFile - // Pre-allocates vector memory using nuStrCount - std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ) + // Pre-allocates vector memory using strCount + std::shared_ptr> findEach( std::string_view textFile, std::string_view searchTerm ) { auto eachPos = std::make_shared>(); - eachPos->reserve( nuStrCount(textFile, searchTerm) ); + eachPos->reserve( strCount(textFile, searchTerm) ); eachPos->push_back( textFile.find( searchTerm ) ); size_t currPos = textFile.find( searchTerm, eachPos->at(0) + 1 ); while( currPos != npos ) @@ -141,10 +141,10 @@ namespace REX // ZW: fcn for splitting a string into a vector of strings, // each element differentiated by linebreaks in the original string - // Removes sequential linebreaks, as well as leading blankspace - std::shared_ptr> nuLineSplitter( std::string_view currEvt ) + // Removes sequential linebreaks, as well as lines with only whitespace + std::shared_ptr> lineSplitter( std::string_view currEvt ) { - auto lineBreaks = nuFindEach( currEvt, "\n" ); + auto lineBreaks = findEach( currEvt, "\n" ); auto splitLines = std::make_shared>(); if( lineBreaks->at(0) == npos ){ splitLines->push_back( currEvt ); return splitLines; } splitLines->reserve( lineBreaks->size() ); @@ -171,7 +171,7 @@ namespace REX // for the string "\n\n\n\n" std::shared_ptr> lineFinder( std::string_view currEvt, size_t startPos = 0, size_t endPos = npos ) { - auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); + auto lineBreaks = findEach( currEvt.substr( startPos, endPos - startPos), "\n" ); auto truBreaks = std::make_shared>(); truBreaks->reserve( lineBreaks->size() ); for( size_t k = 0 ; k < lineBreaks->size() ; ++k ) @@ -181,39 +181,11 @@ namespace REX } return truBreaks; } - - // ZW: fcn for splitting a string into a vector of strings, - // each element separated by blankspace (" ") in the original string - // Ignores sequential blankspaces, as well as linebreaks - // ie "hello \n\n\n world" would return {"hello", "world"} - // Does not ignore linebreaks that are not separated from words - // by anything other than blankspace, - // ie "hello \n\n\nworld \n\n" would return {"hello", "\n\nworld"} - std::shared_ptr> nuWordSplitter( std::string_view currEvt ) - { - std::vector noSpace; - size_t nuStart = currEvt.find_first_not_of( " " ); - size_t nuEnd = currEvt.find(" ", nuStart+1 ); - auto splitWords = std::make_shared>(); - splitWords->reserve(13); - while( nuStart != npos ) - { - std::string_view word = currEvt.substr( nuStart, nuEnd - nuStart ); - if( word == "" || word == "\n" || word == " " ){ - nuStart = currEvt.find_first_not_of(" ", nuEnd); - nuEnd = currEvt.find( " ", nuStart + 1); - continue; } - splitWords->push_back( currEvt.substr( nuStart, nuEnd - nuStart ) ); - nuStart = currEvt.find_first_not_of(" ", nuEnd); - nuEnd = currEvt.find( " ", nuStart + 1); - } - return splitWords; - } // ZW: fcn for splitting a string into a vector of strings, // elements separated by any form of blankspace in the original string // Ignores sequential blankspaces of all forms - std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) + std::shared_ptr> blankSplitter( std::string_view currEvt ) { auto strtPos = currEvt.find_first_not_of(" \n\r\f\t\v"); auto splitString = std::make_shared>(); @@ -221,6 +193,7 @@ namespace REX auto endPos = currEvt.find_first_of(" \n\r\f\t\v", strtPos); while( strtPos != npos ) { + if( endPos == npos ){ splitString->push_back( currEvt.substr( strtPos ) ); break; } splitString->push_back( currEvt.substr( strtPos, endPos - strtPos ) ); strtPos = currEvt.find_first_not_of(" \n\r\f\t\v", endPos); endPos = currEvt.find_first_of(" \n\r\f\t\v", strtPos); @@ -1116,7 +1089,7 @@ namespace REX // ZW: fcn for finding each decay line in SLHA format // parameter card std::vector decBlockStractor( std::string_view parseFile ){ - auto allDs = nuFindEach( parseFile, "\nd" ); + auto allDs = findEach( parseFile, "\nd" ); std::vector decLines; decLines.reserve( allDs->size() ); for( auto pos : *allDs ) @@ -1136,10 +1109,10 @@ namespace REX auto blockStrt = blockFinder( parseFile, startPt ); auto newBlock = blockFinder( parseFile, blockStrt + 1 ); std::vector paramLines; - paramLines.reserve( nuStrCount( parseFile, "\n" ) ); + paramLines.reserve( strCount( parseFile, "\n" ) ); std::shared_ptr> parLines; - if( newBlock == npos ){ parLines = nuLineSplitter( parseFile.substr( blockStrt ) ); } - else{ parLines = nuLineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } + if( newBlock == npos ){ parLines = lineSplitter( parseFile.substr( blockStrt ) ); } + else{ parLines = lineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } for( auto line : *parLines ) { if( line.size() == 0 ){ continue; } @@ -1179,7 +1152,7 @@ namespace REX beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } sourceFile = originFile.substr( beginLine, endLine - beginLine ); - auto evLine = nuWordSplitter( sourceFile ); + auto evLine = blankSplitter( sourceFile ); nprt = evLine->at(0) ; procid = evLine->at(1); weight = evLine->at(2); @@ -1241,7 +1214,7 @@ namespace REX lhePrt::lhePrt( const std::string_view originFile, const size_t& beginLine, const size_t& endLine ) { sourceFile = originFile.substr( beginLine, endLine - beginLine ); - auto evLine = nuWordSplitter( sourceFile ); + auto evLine = blankSplitter( sourceFile ); pdg = evLine->at(0); status = evLine->at(1); mothers[0] = evLine->at(2); mothers[1] = evLine->at(3); @@ -1657,15 +1630,6 @@ namespace REX return this->comp(*ev, relStats); } - event& makeEv( std::vector>& particles ){ - static auto returnEvent = event( particles ); - return returnEvent; - } - - std::vector> getParticles( event& ev ){ - return ev.getPrts(); - } - // ZW: struct for handling the first line of // LHE format init tag bool lheInitHead::isWritten(){ return written; } @@ -1674,7 +1638,7 @@ namespace REX if( isModded() || !isWritten() ){ writer(); } return content; } lheInitHead::lheInitHead( std::string_view initHead ){ - auto vals = *nuBlankSplitter( initHead ); + auto vals = *blankSplitter( initHead ); if( vals.size() < 10 ){ return; } idbmup[0] = vals[0]; idbmup[1] = vals[1]; ebmup[0] = vals[2]; ebmup[1] = vals[3]; @@ -1687,7 +1651,7 @@ namespace REX if( initNode.getName() != "init" ){ return; } auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; auto endPos = initNode.getFile().find( "\n", startPos ); - auto vals = *nuBlankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); + auto vals = *blankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); idbmup[0] = vals[0]; idbmup[1] = vals[1]; ebmup[0] = vals[2]; ebmup[1] = vals[3]; pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; @@ -1711,7 +1675,7 @@ namespace REX lheInitLine::lheInitLine(){} lheInitLine::lheInitLine( std::string_view procLine ) { - auto vals = *nuBlankSplitter( procLine ); + auto vals = *blankSplitter( procLine ); if( vals.size() < 4 ){ return; } xsecup = vals[0]; xerrup = vals[1]; @@ -1743,7 +1707,7 @@ namespace REX } } realLine = paramLine; - auto vals = *nuBlankSplitter( realLine ); + auto vals = *blankSplitter( realLine ); idStr = vals[0]; valStr = vals[1]; if( parseOnline ){ @@ -1774,7 +1738,7 @@ namespace REX // ZW: struct for handling single DECAY line // in SLHA format parameter card void decVal::parse() { - auto vals = *nuBlankSplitter( realLine ); + auto vals = *blankSplitter( realLine ); id = std::stoi( std::string(vals[1]) ); value = std::stod( std::string(vals[2]) ); if( vals.size() > 3 ) @@ -2009,7 +1973,7 @@ namespace REX } bool initNode::parseContent(){ if( content.size() == 0 ){ return false; } - auto lines = nuLineSplitter( content ); + auto lines = lineSplitter( content ); if( lines->size() == 0 ){ return false; } initHead = std::make_shared(lines->at(0) ); for( size_t k = 1 ; k < lines->size() ; ++k ){ @@ -2330,126 +2294,6 @@ namespace REX return writtenSelf; } - // ZW: function for extracting event information from - // LHE files - std::vector>> valExtraction( lheNode& lheFile ) - { - bool getGs = true; - auto momVec = std::make_shared>(); - auto wgtVec = std::make_shared>(); - auto gVec = std::make_shared>(); - auto events = lheFile.getEvents(); - momVec->reserve( events.size() * 4 * std::stoi(std::string(events[0]->getHead().getNprt())) ); - wgtVec->reserve( events.size() ); - gVec->reserve( events.size() ); - if( getGs ){ - for( auto event : events ) - { - wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); - gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); - for( auto prt : event->getPrts() ) - { - momVec->push_back(std::stod(std::string(prt->getE()))); - for( int p = 0 ; p < 3 ; ++p ) - { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } - } - } - } else{ - for( auto event : events ) - { - wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); - gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); - for( auto prt : event->getPrts() ) - { - momVec->push_back(std::stod(std::string(prt->getE()))); - for( int p = 0 ; p < 3 ; ++p ) - { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } - } - - } } - return {momVec, gVec, wgtVec}; - } - - // ZW: fcn for parsing an LHE format event block - // and return a REX format event object - std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - } - size_t equalSign = parseFile.find_first_of("=>", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: fcn for parsing an LHE format header - // and return a REX format lheHead object - std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ - currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); - } - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ - currNode->setInitRwgt( std::make_shared( currNode->getChildren()[ currNode->getChildren().size() - 1 ] ) ); - } - } - size_t equalSign = parseFile.find("=", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: fcn for parsing an LHE format file - // and return a REX format LHE node object - std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - if( parseFile.substr( initPos, 6 ) == "getEvents().push_back( evPtrParsor( parseFile, initPos, endPos ) ); - continue; - } else if( parseFile.substr( initPos, 7 ) == "setHeader(lheHeadParser( parseFile, initPos, endPos )); - continue; - } else if( parseFile.substr( initPos, 5 ) == "setInit( std::make_shared( parseFile, initPos ) ); - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, nodeEndFind( parseFile, endPos + 1 ) + 1); - continue; - } else { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - } - } - size_t equalSign = parseFile.find("=", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - // ZW: struct for treating individual HEP // processes, formatted based on PDG codes // and the LHE particle status standard diff --git a/tools/REX/REX.h b/tools/REX/REX.h index 4f4adcf5b4..cf74424cb5 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -59,10 +59,9 @@ namespace REX std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); extern template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); - std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ); - std::shared_ptr> nuLineSplitter( std::string_view currEvt ); - std::shared_ptr> nuWordSplitter( std::string_view line ); - std::shared_ptr> nuBlankSplitter( std::string_view currEvt ); + std::shared_ptr> findEach( std::string_view textFile, std::string_view searchTerm ); + std::shared_ptr> lineSplitter( std::string_view currEvt ); + std::shared_ptr> blankSplitter( std::string_view currEvt ); std::shared_ptr filePuller( const std::string& fileLoc ); bool filePusher( std::string fileLoc, std::string fileCont ); @@ -460,6 +459,9 @@ namespace REX }; bool clStringComp( std::string_view str1, std::string str2 ); + bool clStringComp( std::string_view str1, std::string_view str2 ); + bool clStringComp( std::string str1, std::string str2 ); + bool clStringComp( std::string str1, std::string_view str2 ); struct lesHouchesCard { public: diff --git a/tools/REX/REX.hpp b/tools/REX/REX.hpp deleted file mode 100644 index 0d62e1d8a7..0000000000 --- a/tools/REX/REX.hpp +++ /dev/null @@ -1,3693 +0,0 @@ -/*** - * ______ _______ __ - * | ___ \ ___\ \ / / - * | |_/ / |__ \ V / - * | /| __| / \ - * | |\ \| |___/ /^\ \ - * \_| \_\____/\/ \/ - * - ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - -// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. - -#ifndef _REX_CC_ -#define _REX_CC_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "REX.h" -#include - -// ZW: all fcns within the REX standard sit in the -// namespace REX -// Note that as a convention, std::string_view objects will be -// referred to as strings unless the difference is relevant -namespace REX -{ - - using sortFcn = std::function>(std::vector)>; - using statSort = std::function>(std::string_view, std::vector)>; - - // ZW: index sorting function, which returs vector - // of the indices of the original vector sorted - // by default in ascending order - // ie, for [5.0, 0.25, 2.0, 9.2] returns [1, 2, 0, 3] - template - std::shared_ptr> indSort(const std::vector &vector, std::function comp = std::less()) - { - auto sorted = std::make_shared>(vector.size()); - std::iota(sorted->begin(), sorted->end(), 0); - std::stable_sort(sorted->begin(), sorted->end(), [&](size_t i, size_t j) { return comp(vector[i], vector[j]); }); - return sorted; - } - - // ZW: wrapper for indSort for comparing string-type arguments representing integers - template - std::shared_ptr> stoiSort(const std::vector &vector) - { - std::function stoicomp = [](const T& i, const T& j) { - return std::stoi(std::string(i)) < std::stoi(std::string(j)); }; - return indSort(vector, stoicomp); - } - template std::shared_ptr> stoiSort(const std::vector &vector); - - // ZW: wrapper for indSort for comparing string-type arguments representing doubles - template - std::shared_ptr> stodSort(const std::vector &vector) - { - std::function stodcomp = [](const T& i, const T& j) { return std::stod(std::string(i)) < std::stod(std::string(j)); }; - return indSort(vector, stodcomp); - } - - // ZW: templated fcn for finding the order of elements in a vector to_sort - // based on their order in a reference vector reference - // Elements not found in reference are represented by npos, - // including if to_sort is longer than reference - template - std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort) { - std::unordered_map> indexMap; - - // Populate indexMap with indices from vec1 - for (size_t i = 0; i < reference.size(); ++i) { - indexMap[reference[i]].push(i); - } - - auto order = std::make_shared>(std::vector(to_sort.size(), npos)); - //order->reserve(to_sort.size()); // Pre-allocate memory - size_t pos = 0; - for (const auto& elem : to_sort) { - auto it = indexMap.find(elem); - if (it != indexMap.end() && !it->second.empty()) { - order->at(pos) = (it->second.front()); - it->second.pop(); - } //else { - // Element in vec2 not found in vec1 - // order->at(pos) = npos; - //} - ++pos; - } - - return order; - } - template std::shared_ptr> getRefOrder(const std::vector& reference, const std::vector& to_sort); - - // ZW: minimal fcn for counting the amount of times - // a given search term appears in a string - int nuStrCount( std::string_view searchString, std::string_view searchTerm ) - { - int count = 0; - size_t pos = 0; - while((pos = searchString.find(searchTerm, pos)) != npos ){ - ++count; - ++pos; - } - return count; - } - - // ZW: fcn for finding the location of each - // entry of seachTerm in the given string textFile - // Pre-allocates vector memory using nuStrCount - std::shared_ptr> nuFindEach( std::string_view textFile, std::string_view searchTerm ) - { - auto eachPos = std::make_shared>(); - eachPos->reserve( nuStrCount(textFile, searchTerm) ); - eachPos->push_back( textFile.find( searchTerm ) ); - size_t currPos = textFile.find( searchTerm, eachPos->at(0) + 1 ); - while( currPos != npos ) - { - eachPos->push_back( currPos ); - currPos = textFile.find( searchTerm, currPos + 1 ); - } - return eachPos; - } - - // ZW: fcn for splitting a string into a vector of strings, - // each element differentiated by linebreaks in the original string - // Removes sequential linebreaks, ie "\n\n\n" would - // only result in a single element separation - std::shared_ptr> nuLineSplitter( std::string_view currEvt ) - { - auto lineBreaks = nuFindEach( currEvt, "\n" ); - std::vector trueBreaks; - trueBreaks.reserve( lineBreaks->size() ); - for( size_t k = 0 ; k < lineBreaks->size() - 1 ; ++k ) - { - if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} - trueBreaks.push_back( (*lineBreaks)[k] ); - } - auto splitLines = std::make_shared>(); - splitLines->reserve( trueBreaks.size() ); - size_t startPos = 0; - for( auto k : trueBreaks ) - { - splitLines->push_back( currEvt.substr( startPos + 1, k - startPos - 1) ); - startPos = k; - } - if( currEvt.substr( startPos ).size() > 1 ){ splitLines->push_back( currEvt.substr( startPos ) ); } - return splitLines; - } - - // ZW: fcn for finding each linebreak in a string, - // returning a vector of the positions of "\n" characters - // Ignores sequential linebreaks, ie would only return { } - // for the string "\n\n\n\n" - std::shared_ptr> lineFinder( std::string_view currEvt, size_t startPos = 0, size_t endPos = npos ) - { - auto lineBreaks = nuFindEach( currEvt.substr( startPos, endPos - startPos), "\n" ); - auto truBreaks = std::make_shared>(); - truBreaks->reserve( lineBreaks->size() ); - for( size_t k = 0 ; k < lineBreaks->size() ; ++k ) - { - if( int( (*lineBreaks)[k+1] - (*lineBreaks)[k]) == 1){continue;} - truBreaks->push_back( (*lineBreaks)[k] ); - } - return truBreaks; - } - - // ZW: fcn for splitting a string into a vector of strings, - // each element separated by blankspace (" ") in the original string - // Ignores sequential blankspaces, as well as linebreaks - // ie "hello \n\n\n world" would return {"hello", "world"} - // Does not ignore linebreaks that are not separated from words - // by anything other than blankspace, - // ie "hello \n\n\nworld \n\n" would return {"hello", "\n\nworld"} - std::shared_ptr> nuWordSplitter( std::string_view currEvt ) - { - std::vector noSpace; - size_t nuStart = currEvt.find_first_not_of( " " ); - size_t nuEnd = currEvt.find(" ", nuStart+1 ); - auto splitWords = std::make_shared>(); - splitWords->reserve(13); - while( nuStart != npos ) - { - std::string_view word = currEvt.substr( nuStart, nuEnd - nuStart ); - if( word == "" || word == "\n" || word == " " ){ - nuStart = currEvt.find_first_not_of(" ", nuEnd); - nuEnd = currEvt.find( " ", nuStart + 1); - continue; } - splitWords->push_back( currEvt.substr( nuStart, nuEnd - nuStart ) ); - nuStart = currEvt.find_first_not_of(" ", nuEnd); - nuEnd = currEvt.find( " ", nuStart + 1); - } - return splitWords; - } - - // ZW: fcn for splitting a string into a vector of strings, - // elements separated by any form of blankspace in the original string - // Ignores sequential blankspaces of all forms - std::shared_ptr> nuBlankSplitter( std::string_view currEvt ) - { - auto lines = nuLineSplitter( currEvt ); - auto splitString = std::make_shared>(); - splitString->reserve( lines->size() * lines->at(0).size() ); - for( auto line : *lines ) - { - auto words = nuWordSplitter(line); - for( auto word : *words ) - { - if( word == "" || word == "\n" || word == " " ){continue;} - splitString->push_back( word ); - } - } - return splitString; - } - - // ZW: templated fcn for comparing two - // string-like objects, ignoring cases - bool clStringComp( std::string_view org, std::string comp ){ - return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - } - bool clStringComp( std::string_view org, std::string_view comp ){ - return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - } - bool clStringComp( std::string org, std::string_view comp ){ - return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - } - bool clStringComp( std::string org, std::string comp ){ - return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - } - // template - // bool clStringComp( const Str1& org, const Str2& comp ){ - // return std::equal( org.begin(), org.end(), comp.begin(), comp.end(), - // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - // } - // template - // bool clStringComp( const Str1Pt& orgStrt, const Str1Pt& orgEnd, const Str2& comp ){ - // return std::equal( orgStrt, orgEnd, comp.begin(), comp.end(), - // []( const char& x, char y ){ return (std::toupper(x) == std::toupper(y)); } ); - // } - - // ZW: templated fcn for finding a caseless substring searchTerm in srcFile - // On failure to find searchTerm, returns REX::npos - template - size_t clStringFind( const Str1& srcFile, const Str2& searchTerm, size_t strtPt = 0 ){ - size_t strLen = searchTerm.size(); - if( srcFile.size() == 0 || srcFile.size() < strLen ){ return npos; } - for( size_t k = strtPt ; k < srcFile.size() - strLen; ++k ) - { - if( clStringComp( srcFile.substr(k, strLen), searchTerm ) ){ return k; } - } - return npos; - } - - // ZW: templated fcn for finding a caseless substring searchTerm of srcFile - // fulfilling a particular predicate cond( size_t, string ) - template - size_t clStringFindIf( const Str1& srcFile, const Str2& searchTerm, std::function& cond, size_t strtPt = 0 ) - { - auto currPt = clStringFind( srcFile, searchTerm, strtPt ); - bool condStat = cond( currPt, srcFile ); - while( !( condStat ) && currPt != npos) - { - currPt = clStringFind( srcFile, searchTerm, currPt + 1 ); - condStat = cond( currPt, srcFile ); - } - return currPt; - } - - // ZW: templated fcn for counting the number of occurances of - // caseless substring searchTerm in string-like object srcFile - template - int clStrCount( Str1 srcFile, Str2 searchTerm ) - { - int count = 0; - size_t pos = 0; - while((pos = clStringFind( srcFile, searchTerm, pos ) ) != npos ){ - ++count; - ++pos; - } - return count; - } - - // ZW: templated fcn for finding each instance of - // of substring searchTerm of string-like object srcFile - template - std::shared_ptr> clFindEach( Str1 srcFile, Str2 searchTerm ) - { - auto eachPos = std::make_shared>(); - auto nos = clStrCount(srcFile, searchTerm); - if( nos == 0 ){ return eachPos; } - eachPos->reserve( nos ); - eachPos->push_back( clStringFind( srcFile, searchTerm ) ); - size_t currPos = clStringFind( srcFile, searchTerm, eachPos->at(0) + 1); - while( currPos != npos ) - { - eachPos->push_back( currPos ); - currPos = clStringFind( srcFile, searchTerm, currPos + 1 ); - } - return eachPos; - } - - // ZW: fcn for finding left angle bracket - // indicating the start of a new node in an XML file - size_t nodeStartFind( std::string_view parseFile, size_t strtPos ) - { - auto retPtr = parseFile.find("<", strtPos); - while( parseFile[retPtr + 1] == '!' || parseFile[retPtr +1] == '/' || parseFile[retPtr +1] == '?' ){ - retPtr = parseFile.find("<", retPtr +1); - } - return retPtr; - } - - size_t endNodeStartFind( std::string_view parseFile, size_t strtPos ) - { - return parseFile.find(">", nodeStartFind( parseFile, strtPos )); - } - - std::pair startNodePts( std::string_view parseFile, size_t strtPos ) - { - return { nodeStartFind( parseFile, strtPos ), endNodeStartFind( parseFile, strtPos ) }; - } - - // ZW: fcn for finding left angle bracket - // indicating an end of a node in an XML file - size_t nodeEndFind( std::string_view parseFile, size_t strtPos ) - { - auto retPtr = parseFile.find("<", strtPos); - while( parseFile[retPtr + 1] != '/' ){ - retPtr = parseFile.find("<", retPtr +1); - } - return retPtr; - } - - size_t endNodeEndFind( std::string_view parseFile, size_t strtPos ) - { - return parseFile.find(">", nodeEndFind( parseFile, strtPos )); - } - - std::pair endNodePts( std::string_view parseFile, size_t strtPos ) - { - return { nodeEndFind( parseFile, strtPos ), endNodeEndFind( parseFile, strtPos ) }; - } - - // ZW: struct for handling tags in XML node opening tags - void xmlTag::setVal( std::string_view valSet ){ modded = true; val = valSet; } - void xmlTag::setId( std::string_view idSet ){ modded = true; id = idSet; } - std::string_view xmlTag::getVal(){ return val; } - std::string_view xmlTag::getId(){ return id; } - bool xmlTag::isModded(){ return modded; } - xmlTag::xmlTag(){ modded = false; return; } - xmlTag::xmlTag( xmlTag& oldTag ){ - modded = false; val = oldTag.getVal(); id = oldTag.getId(); - } - xmlTag::xmlTag( std::string_view initId, std::string_view initVal){ - modded = false; val = initVal; id = initId; - } - - // ZW: function for parsing XML opening - // tags and returning the next header tag - std::shared_ptr xmlTagParser( std::string_view tagLine, size_t& equPt ) - { - auto tagBreaker = tagLine.find_first_not_of(" ", equPt+1); // ZW: need to determine what type of quotation marks are used - auto tagEnder = tagLine.find( tagLine[tagBreaker], tagBreaker+1); - auto attrEnd = tagLine.find_last_not_of(" ", equPt - 1) ; - auto attrStart = tagLine.find_last_of(" ", attrEnd) + 1; - auto tagPtr = std::make_shared(tagLine.substr(attrStart, attrEnd - attrStart + 1), tagLine.substr(tagBreaker + 1, tagEnder - tagBreaker - 1)); - equPt = tagLine.find("=", equPt + 1); // ZW: modifies input equPt to point to the next equality sign in tagLine - return tagPtr; - } - - // ZW: struct for handling the tree structure of XML files, - // essentially just giving the positions of the beginning and - // end of each node s.t. the proper node structures can accurately - // detail where children begin and end while allowing for personal - // content between child nodes - xmlTree::xmlTree(){ return; } - xmlTree::xmlTree( std::string_view file ){ - origin = file; - children = std::make_shared>>(); - start = file.find_first_not_of(" \n\r\f\t\v"); - if( file.compare(start, 1, "<") != 0 ) { - faux = true; - contSt = start; - end = std::min( nodeStartFind(file, start), nodeEndFind(file, start) ); - contEnd = end; - initialised = true; - return; - } - if( file.compare(start + 1, 1, "!") == 0 || file.compare(start + 1, 1, "?") == 0 ) { - faux = true; - contSt = start; - contEnd = file.find(">", start + 1); - end = std::min( nodeStartFind(file, contEnd), nodeEndFind(file, contEnd) ); - initialised = true; - return; - } - auto stEnd = file.find(">", start); - if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { - end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); - contSt = npos; - contEnd = npos; - initialised = true; - return; - } - contSt = stEnd + 1; - auto stPos = nodeStartFind(file, start + 1); - stEnd = nodeEndFind(file, start + 1); - contEnd = std::min(stPos, stEnd); - while( stPos < stEnd ) - { - children->push_back( std::make_shared( file, stPos, stEnd ) ); - } - stEnd = endNodeEndFind(file, stEnd); - end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); - initialised = true; - } - xmlTree::xmlTree( std::string_view file, size_t& strt, size_t& nd ){ - origin = file; - children = std::make_shared>>(); - start = file.find_first_not_of(" \n\r\f\t\v", strt); - if( file.compare(start, 1, "<") != 0) { - faux = true; - contSt = start; - strt = nodeStartFind(file, start); - nd = nodeEndFind(file, start); - end = std::min( strt, nd ); - contEnd = end; - initialised = true; - return; - } - if( file.compare(start + 1, 1, "!") == 0 ) { - faux = true; - contSt = start; - contEnd = file.find(">", start + 1); - strt = nodeStartFind(file, contEnd); - nd = nodeEndFind(file, contEnd); - end = std::min( strt, nd ); - initialised = true; - return; - } - auto stEnd = file.find(">", start); - if( file.compare(stEnd - 1, 1, "/" ) == 0 ) { - end = file.find_first_not_of(" \n\r\f\t\v", stEnd + 1); - contSt = npos; - contEnd = npos; - strt = nodeStartFind(file, start); - nd = nodeEndFind(file, start); - initialised = true; - return; - } - contSt = stEnd + 1; - strt = nodeStartFind(file, start + 1); - nd = nodeEndFind(file, start + 1); - contEnd = std::min(strt, nd); - while( strt < nd ) - { - children->push_back( std::make_shared( file, strt, nd ) ); - } - end = file.find_first_not_of(" \n\r\f\t\v", endNodeEndFind(file, nd) + 1); - initialised = true; - strt = end; - nd = nodeEndFind(file, strt); - } - - // ZW: struct for handling nodes in generic XML files - xmlNode::xmlNode(){ modded = false; return; } - xmlNode::xmlNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ){ - modded = false; - xmlFile = originFile.substr( begin ); - structure = xmlTree( originFile ); - faux = structure.isFaux(); - start = structure.getStart(); - end = structure.getEnd(); - size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start+1); - name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - for( auto child : childs ){ - children.push_back( child ); - } - } - xmlNode::xmlNode( xmlTree &tree ){ - modded = false; - structure = tree; - if( !structure.isInit() ){ return; } - xmlFile = structure.getOrigin(); - faux = structure.isFaux(); - start = structure.getStart(); - end = structure.getEnd(); - size_t trueStart = xmlFile.find_first_not_of("< \n\r\f\t\v", start); - name = xmlFile.substr( trueStart, xmlFile.find_first_of(">/ \n\r\f\t\v", trueStart) - trueStart ); - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - for( auto& child : *(structure.getChildren()) ){ - children.push_back( std::make_shared( *child ) ); - } - } - std::vector> xmlNode::getChildren(){ return children; } - std::vector> xmlNode::getTags(){ return tags; } - std::string_view xmlNode::getFile(){ return xmlFile; } - std::string_view xmlNode::getName(){ return name; } - std::string_view xmlNode::getContent(){ return content; } - size_t xmlNode::getStart(){ return start; } - size_t xmlNode::getEnd(){ return end; } - xmlTree xmlNode::getTree(){ return structure; } - bool xmlNode::isModded(){ return modded; } - bool xmlNode::isModded( bool deep ){ - bool modStat = isModded(); - if( !deep ){ return modStat; } - for( auto child : children ){ modStat = (modStat || child->isModded( deep )); } - return modStat; - } - bool xmlNode::isWritten(){ return written; } - bool xmlNode::isParsed(){ return parsed; } - bool xmlNode::isFaux(){ return faux; } - bool xmlNode::hasChildren(){ return children.size() > 0; } - void xmlNode::setModded( bool mod ){ modded = mod; } - bool xmlNode::deepModded(){ return deepMod; } - bool xmlNode::deepParse(){ return deepParsed; } - void xmlNode::parser( bool recursive ){ - parsed = parse( recursive ); - } - void xmlNode::addChild( std::shared_ptr child ){ modded = true; children.push_back(child); } - void xmlNode::addTag( std::shared_ptr tag ){ modded = true; tags.push_back(tag); } - void xmlNode::setFile( std::string_view file ){ modded = true; xmlFile = file; } - void xmlNode::setName( std::string_view newName ){ modded = true; name = newName; } - void xmlNode::setCont( std::string_view cont ){ modded = true; content = cont; } - - bool xmlNode::parse(){ - auto topStat = parseTop(); - auto contStat = parseContent(); - return ( topStat && contStat ); - } - bool xmlNode::parse( bool recurs ) - { - bool parseSt = parse(); - if( !recurs ){ return parseSt; } - bool childSt = parseChildren( recurs ); - deepMod = true; - return (parseSt && childSt ); - } - bool xmlNode::parseTop(){ - if( xmlFile == "" ){ return false; } - if( isFaux() ){ return true; } - size_t eqSgn = xmlFile.find( "=", start ); size_t nodeInitEnd = xmlFile.find( ">", start ); - while( eqSgn < nodeInitEnd ){ tags.push_back( xmlTagParser( xmlFile, eqSgn ) ); } - return true; - } - bool xmlNode::parseContent(){ - if( xmlFile == "" ){ return false; } - end = structure.getContEnd(); - for( auto branch : *(structure.getChildren()) ){ - children.push_back( std::make_shared( *branch ) ); - } - return true; - } - bool xmlNode::parseChildren( bool recursive ){ - bool status = true; - if( recursive ){ - for( auto child : children ) - { - status = (status && child->parse( true )); - deepParsed = true; - } - } else { - for( auto child : children ) - { - status = (status && child->parse()); - deepParsed = true; - } - } - return status; - } - void xmlNode::headWriter() { - if( isFaux() ){ return; } - nodeHeader = "<" + std::string(name) ; - for( auto tag : tags ){ - nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - nodeHeader += ">"; - } - void xmlNode::endWriter() { - if( isFaux() ){ return; } - auto endSt = xmlFile.find_last_of("<", end); - nodeEnd = xmlFile.substr( endSt, end - endSt ); - } - void xmlNode::contWriter() { - if( hasChildren() ){ - nodeContent = std::string(content.substr(0, children[0]->start - 1 )); - } else { - nodeContent = std::string(content); - } - } - void xmlNode::childWriter() { - for(auto child : children){ - nodeContent += (*child->nodeWriter()); - } - } - void xmlNode::endFinder(){ - auto headEnd = xmlFile.find(">", start); - auto slashPos = xmlFile.find("/", start); - if( headEnd > slashPos ){ end = headEnd; } - else{ end = xmlFile.find( ">", xmlFile.find( "( nodeHeader + nodeContent + nodeEnd ); - written = true; - modded = false; - } else if( !isWritten() ){ - writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); - written = true; - } - } - - void xmlNode::childCounter( int& noChilds ) - { - for( auto child : children ) - { - child->childCounter( noChilds ); - if( child->end == 0 || child->isFaux() ){ --noChilds; } - } - noChilds += children.size(); - } - int xmlNode::childCounter() { - int noChilds = 0; - childCounter( noChilds ); - return noChilds; - } - std::shared_ptr xmlNode::nodeWriter() { - if( isModded( true ) || !isWritten() ){ fullWriter(); } - return writtenSelf; - } - - - // ZW: function for large scale parsing of XML files - // sequentially goes through the document and - // recursively calls itself while the next node - // beginning is closer than the next node ending - std::shared_ptr xmlPtrParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - size_t equalSign = parseFile.find("=", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - while( initPos < endPos ) - { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - } - - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: struct for handling rwgt parameter sets - // in the LHE header initrwgt node - int headWeight::headWeight::getId(){ return id; } - std::string_view headWeight::getTag(){ return idTag; } - bool headWeight::hasTag(){ return (idTag.size() > 0); } - headWeight::headWeight(){ name = "weight"; return; } - headWeight::headWeight( std::string_view paramSet, const size_t& begin ) : xmlNode(){ name = "weight"; xmlFile = paramSet; content = paramSet; return; } - headWeight::headWeight( std::string_view paramSet, std::string_view idText, int idNo, const size_t& begin ) : xmlNode(){ - name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; - } - headWeight::headWeight( xmlNode& node ) : xmlNode( node ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( xmlNode* node ) : xmlNode( *node ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( std::shared_ptr node ) : xmlNode( *node ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( xmlTree& tree ) : xmlNode( tree ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( xmlTree* tree ) : xmlNode( *tree ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( std::shared_ptr tree ) : xmlNode( *tree ){ - parser( false ); - name = "weight"; - for (auto tag : tags ){ - if( tag->getId() == "id" ){ - idTag = tag->getVal().substr(0, tag->getVal().find_last_of("_") - 1 ); - id = std::stoi( std::string( tag->getVal().substr( idTag.size() + 1 ) ) ); - } - } - } - headWeight::headWeight( std::string_view paramSet, std::string& idText, unsigned int idNo, const size_t& begin ) : xmlNode(){ - name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; id = idNo; - } - headWeight::headWeight( std::string_view paramSet, std::string& idText){ - name = "weight"; xmlFile = paramSet; content = paramSet; idTag = idText; - } - void headWeight::setId( std::string identity ){ modded = true; idTag = identity; } - void headWeight::headWriter(){ - if( tags.size() == 0 ){ - if( idTag == "" ){ nodeHeader = ""; return; } - if( id == npos ){ nodeHeader = ""; return; } - nodeHeader = ""; - return; - } - nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - nodeHeader += ">"; - } - void headWeight::headWriter( bool incId ){ - if( !incId ){ headWriter(); return; } - if( idTag == "" ){ headWriter(); return; } - if( id == npos ){ nodeHeader = "getId() == "id" ){ continue; } - nodeHeader += " " + std::string(tag->getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - nodeHeader += ">"; - } - void headWeight::endWriter() { - nodeEnd = "\n"; - } - void headWeight::contWriter() { - nodeContent = std::string( content ); - } - void headWeight::childWriter() { - for( auto child : children){ - if( child->getName() == "weight" ){ continue; } - nodeContent += *(child->nodeWriter()); - } - } - void headWeight::childWriter( bool hasChildren ){ - if( hasChildren ){ childWriter(); } - } - void headWeight::fullWriter(){ - if( isModded() || !isWritten() ){ - headWriter(); - contWriter(); - childWriter(); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - written = true; - modded = false; - } - } - void headWeight::fullWriter( bool incId, bool hasChildren ){ - if( isModded() || !isWritten() ){ - headWriter( incId ); - contWriter(); - childWriter( hasChildren ); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - modded = false; - written = true; - } - } - - // ZW: struct for handling rwgt groups - // in the LHE header initrwgt node - bool weightGroup::getIncId(){ return includeId; } - void weightGroup::setIncId( bool nuIncId ){ includeId = nuIncId; } - std::vector> weightGroup::getWgts(){ return paramSets; } - void weightGroup::addWgt( headWeight nuWgt ){ modded = true; paramSets.push_back( std::make_shared( nuWgt ) ); if( nuWgt.hasTag() ){ includeId = true; } } - void weightGroup::addWgt( std::shared_ptr nuWgt ){ modded = true; paramSets.push_back( nuWgt); if( nuWgt->hasTag() ){ includeId = true; }} - weightGroup::weightGroup() : xmlNode(){ name = "weightgroup"; return; } - weightGroup::weightGroup( std::vector> nuWgts ) : xmlNode(){ name = "weightgroup"; paramSets = nuWgts; for( auto wgt : nuWgts ){ if( wgt->hasTag() ){ includeId = true; } } } - weightGroup::weightGroup( std::vector nuWgts ) : xmlNode(){ - name = "weightgroup"; - for( auto wgt : nuWgts ){ - paramSets.push_back( std::make_shared( wgt ) ); - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( xmlNode& wgtNode ) : xmlNode( wgtNode ){ - parser( true ); - name = "weightgroup"; - paramSets.reserve( children.size() ); - for( auto child : children ){ - if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ - parser( true ); - name = "weightgroup"; - paramSets.reserve( children.size() ); - for( auto child : children ){ - if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( xmlTree& wgtTree ) : xmlNode( wgtTree ){ - parser( true ); - name = "weightgroup"; - paramSets.reserve( children.size() ); - for( auto child : children ){ - if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ - parser( true ); - name = "weightgroup"; - paramSets.reserve( children.size() ); - for( auto child : children ){ - if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ - parser( true ); - name = "weightgroup"; - paramSets.reserve( children.size() ); - for( auto child : children ){ - if( child->getName() == "weight" ){ paramSets.push_back( std::make_shared( *child ) ); } - } - for( auto wgt : paramSets ){ if( wgt->hasTag() ){ includeId = true; } } - } - weightGroup::weightGroup( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) - : xmlNode( originFile, begin, childs ){ - name = "weightgroup"; - if( parseTop() ){ - int checker = 0; - for( auto tag : tags ){ - if( tag->getId() == "name" ){ ++checker; rwgtName = tag->getVal(); } - if( tag->getId() == "weight_name_strategy" ){ ++checker; wgtNamStrat = tag->getVal(); - if(wgtNamStrat == "includeIdInWeightName"){ includeId = true; } } - if( checker == 2 ){ break; } - } - } - } - void weightGroup::headWriter() { - nodeHeader = "nodeWriter()); - } - } - void weightGroup::childWriter() { - for(auto child : children){ - if( child->getName() == "weight" ){ continue; } - nodeContent += (*child->nodeWriter()); - } - } - void weightGroup::childWriter( bool hasChildren ){ - if( hasChildren ){ childWriter(); } - return; - } - void weightGroup::endWriter() { nodeEnd = "
\n"; } - - std::vector> initRwgt::getGroups(){ return groups; } - size_t initRwgt::noGrps(){ return groups.size(); } - void initRwgt::addGroup( weightGroup nuGroup ){ - modded = true; - auto nuGrpPtr = std::make_shared( nuGroup ); - if( grpInit( nuGrpPtr ) ){ groups.push_back( std::make_shared( nuGroup ) ); } - } - void initRwgt::addGroup( std::shared_ptr nuGroup ){ - modded = true; - if( grpInit( nuGroup ) ){ groups.push_back( nuGroup ); } - } - void initRwgt::addWgt( unsigned int index, std::shared_ptr nuWgt ){ - if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } - else throw std::range_error( "Appending weight to uninitialised weightgroup." ); - } - void initRwgt::addWgt( unsigned int index, headWeight nuWgt ){ - if( index < groups.size() ){ modded = true; groups[index]->addWgt( nuWgt ); } - else throw std::range_error( "Appending weight to uninitialised weightgroup." ); - } - initRwgt::initRwgt() : xmlNode(){ name = "initrwgt"; return; } - initRwgt::initRwgt( std::vector> nuGroups ) : xmlNode(){ - name = "initrwgt"; - for( auto group : nuGroups ){ - groups.push_back( std::make_shared( *group ) ); - } - } - initRwgt::initRwgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ - parser( true ); - name = "initrwgt"; - groups.reserve( children.size() ); - for( auto child : children ){ - groups.push_back( std::make_shared( *child ) ); - } - } - initRwgt::initRwgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ - parser( true ); - name = "initrwgt"; - groups.reserve( children.size() ); - for( auto child : children ){ - groups.push_back( std::make_shared( *child ) ); - } - } - initRwgt::initRwgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ - parser( true ); - name = "initrwgt"; - groups.reserve( children.size() ); - for( auto child : children ){ - groups.push_back( std::make_shared( *child ) ); - } - } - initRwgt::initRwgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ - parser( true ); - name = "initrwgt"; - groups.reserve( children.size() ); - for( auto child : children ){ - groups.push_back( std::make_shared( *child ) ); - } - } - bool initRwgt::grpInit( std::shared_ptr& wgt ){ - if( grpIsInit ){ return true; } - else{ - groups = std::vector>( 1, wgt ); - grpIsInit = true; - return false; - } - } - void initRwgt::contWriter(){ - nodeContent = "\n"; - for( auto group : groups ){ - nodeContent += (*group->nodeWriter()); - } - } - void initRwgt::childWriter(){ - for( auto child : children ){ - if( child->getName() == "weightgroup" ){ continue; } - nodeContent += (*child->nodeWriter()); - } - } - void initRwgt::childWriter( bool hasChildren ){ - if( hasChildren ){ childWriter(); } - return; - } - - // ZW: struct for handling weights - // in event blocks of LHE files - void bodyWgt::setComment( std::string_view nuComment ){ modded = true; comment = nuComment; } - void bodyWgt::setVal( std::string nuVal ){ modded = true; valS = nuVal; valD = std::stod(valS);} - void bodyWgt::setVal( std::string_view nuVal ){ modded = true; valS = std::string(nuVal); valD = std::stod(valS);} - void bodyWgt::setVal( double nuVal ){ modded = true; valD = nuVal; valS = std::to_string(valD);} - void bodyWgt::setId( std::string nuId ){ - modded = true; id = nuId; - for( auto tag : tags ){ - if( tag->getId() == "id" ){ tag->setVal( id ); return; } - } - addTag( std::make_shared( "id", id ) ); - } - void bodyWgt::setModded( bool nuModded ){ modded = nuModded; } - std::string_view bodyWgt::getComment(){ return comment; } - std::string_view bodyWgt::getValS(){ return valS; } - double bodyWgt::getValD(){ return valD; } - bodyWgt::bodyWgt() : xmlNode(){ return; } - bodyWgt::bodyWgt( std::string_view value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt::bodyWgt( double value ) : xmlNode() { setVal( value ); modded = false; } - bodyWgt::bodyWgt( std::string_view value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt::bodyWgt( double value, xmlTag rwgtId ) : xmlNode() { setVal( value ); addTag( std::make_shared(rwgtId) ); modded = false; } - bodyWgt::bodyWgt( std::string_view value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt::bodyWgt( double value, std::shared_ptr rwgtId ) : xmlNode() { setVal( value ); addTag( rwgtId ); modded = false; } - bodyWgt::bodyWgt( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) - : xmlNode( originFile, begin, childs ){ - auto strtPt = originFile.find_first_not_of(" >+", originFile.find(">", begin)+1); - valS = originFile.substr( strtPt, originFile.find(" ", strtPt) - strtPt ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( xmlNode& wgtNode ) : xmlNode( wgtNode ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( xmlNode* wgtNode ) : xmlNode( *wgtNode ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( std::shared_ptr wgtNode ) : xmlNode( *wgtNode ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( xmlTree& wgtTree ) : xmlNode( wgtTree ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( xmlTree* wgtTree ) : xmlNode( *wgtTree ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( std::shared_ptr wgtTree ) : xmlNode( *wgtTree ){ - parser( true ); - valS = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - valD = std::stod( valS ); - } - bodyWgt::bodyWgt( double value, std::string& idTag ){ - setVal( value ); - id = idTag; - addTag( std::make_shared("id",id) ); - } - void bodyWgt::appendWgt( std::shared_ptr document ){ - if( !isWritten() ){ fullWriter(); } - *document += *writtenSelf; - } - void bodyWgt::appendWgt( std::string* document ){ - if( !isWritten() ){ fullWriter(); } - *document += *writtenSelf; - } - std::shared_ptr bodyWgt::appendWgt( std::string_view document ){ - if(!isWritten() ){ fullWriter(); } - auto retDoc = std::make_shared( document ); - *retDoc += *writtenSelf; - return retDoc; - } - void bodyWgt::fullWriter() { - writtenSelf = std::make_shared( "getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - *writtenSelf += ">" + std::string(valS) + "\n"; - modded = false; - written = true; - } - - // ZW: fcn for finding the next block in SLHA format - // parameter cards - size_t blockFinder( std::string_view parseFile, size_t startPt = 0 ){ - if( parseFile.size() > 5 ){ if( clStringComp(parseFile.substr(0,5), std::string("block") )){ return size_t(0); } } - return clStringFind( parseFile, std::string("\nblock"), startPt ); - } - - // ZW: fcn for finding each decay line in SLHA format - // parameter card - std::vector decBlockStractor( std::string_view parseFile ){ - auto allDs = nuFindEach( parseFile, "\nd" ); - std::vector decLines; - decLines.reserve( allDs->size() ); - for( auto pos : *allDs ) - { - if( !(clStringComp(parseFile.substr( pos+1, 5 ), std::string("decay"))) ){ continue; } - decLines.push_back( parseFile.substr( pos + 1, parseFile.find( "\n", pos + 1 ) - pos - 1 ) ); - } - return decLines; - } - - // ZW: fcn for extracting the relevant lines of - // a block in SLHA format parameter card - // removes any comments between start of this block and next - // and also ignores lines with other information, - // eg DECAY lines - std::vector blockLineStractor( std::string_view parseFile, size_t startPt = 0){ - auto blockStrt = blockFinder( parseFile, startPt ); - auto newBlock = blockFinder( parseFile, blockStrt + 1 ); - std::vector paramLines; - paramLines.reserve( nuStrCount( parseFile, "\n" ) ); - std::shared_ptr> parLines; - if( newBlock == npos ){ parLines = nuLineSplitter( parseFile.substr( blockStrt ) ); } - else{ parLines = nuLineSplitter( parseFile.substr( blockStrt, newBlock - blockStrt ) ); } - for( auto line : *parLines ) - { - if( line.size() == 0 ){ continue; } - if( line[0] != ' ' ){ continue; } - paramLines.push_back( line ); - } - return paramLines; - } - - // ZW: struct for handling the first line of - // LHE format event block - std::string_view evHead::getComment(){ return comment; } - std::string_view evHead::getWeight(){ return weight; } - std::string_view evHead::getScale(){ return scale; } - std::string_view evHead::getAQED(){ return aqed; } - std::string_view evHead::getAQCD(){ return aqcd; } - std::string_view evHead::getNprt(){ return nprt; } - std::string_view evHead::getProcID(){ return procid; } - bool evHead::isModded(){ return modded; } - bool evHead::isWritten(){ return written; } - void evHead::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void evHead::setWeight( std::string_view nuWgt ){ modded = true; weight = nuWgt; } - void evHead::setScale( std::string_view nuScale ){ modded = true; scale = nuScale; } - void evHead::setAQED( std::string_view nuAQED ){ modded = true; aqed = nuAQED; } - void evHead::setAQCD( std::string_view nuAQCD ){ modded = true; aqcd = nuAQCD; } - void evHead::setNprt( std::string_view nuNprt ){ modded = true; nprt = nuNprt; } - void evHead::setNprt( int nuNprt ){ modded = true; nprtint = nuNprt; nprtstr = std::to_string(nuNprt); nprt = nprtstr;} - void evHead::setProcID( std::string_view nuProcID ){ modded = true; procid = nuProcID; } - std::shared_ptr evHead::getContent(){ - if( !isWritten() || isModded() ){ writer(); } - return content; - } - evHead::evHead(){ return; } - evHead::evHead( const std::string_view originFile, size_t beginLine, size_t endLine ) - { - if( originFile.size() == 0){ return; } - beginLine = originFile.find_first_not_of("\n \r\f\t\v", beginLine); - if( endLine == npos ){ endLine = originFile.find("\n", beginLine ) + 1; } - sourceFile = originFile.substr( beginLine, endLine - beginLine ); - auto evLine = nuWordSplitter( sourceFile ); - nprt = evLine->at(0) ; - procid = evLine->at(1); - weight = evLine->at(2); - scale = evLine->at(3); - aqed = evLine->at(4); - aqcd = evLine->at(5); - } - void evHead::writer(){ - if( isWritten() && !isModded() ){ return; } - if( !isModded() ){ content = std::make_shared( sourceFile ); return; } - auto retText = std::make_shared( " " ); - *content = " " + std::string( nprt ); - for( size_t k = 0 ; k < 8 - procid.length() ; ++k ){ *content += " "; } - *content += std::string( procid ) + " " + std::string( weight ) + " " + std::string( scale ) + " " + std::string( aqed ) + " " + std::string( aqcd ); - if( comment != "" ){ *content += " # " + std::string( comment ); } - *content += "\n"; - modded = false; - written = true; - } - - // ZW: struct for handling particle lines - // in LHE format event block - std::string_view lhePrt::getLine(){ return sourceFile; } - std::string_view lhePrt::getComment(){ return comment; } - std::vector lhePrt::getMom(){ return std::vector( std::begin( mom ), std::end( mom ) ); } - std::string_view lhePrt::getE(){ return energy; } - std::string_view lhePrt::getMass(){ return mass; } - std::string_view lhePrt::getVTim(){ return vtim; } - std::string_view lhePrt::getSpin(){ return spin; } - std::string_view lhePrt::getPDG(){ return pdg; } - std::string_view lhePrt::getStatus(){ return status; } - std::vector lhePrt::getMothers(){ return std::vector( std::begin( mothers ), std::end( mothers ) ); } - std::vector lhePrt::getColor(){ return std::vector( std::begin( icol ), std::end( icol ) ); } - void lhePrt::setComment( std::string_view nuCom ){ modded = true; comment = nuCom; } - void lhePrt::setMom( std::vector nuMom ){ modded = true; mom[0] = nuMom[0]; mom[1] = nuMom[1]; mom[2] = nuMom[2]; } - void lhePrt::setEnergy( std::string_view nuE ){ modded = true; energy = nuE; } - void lhePrt::setMass( std::string_view nuM ){ modded = true; mass = nuM; } - void lhePrt::setVTim( std::string_view nuVTim ){ modded = true; vtim = nuVTim; } - void lhePrt::setSpin( std::string_view nuSpin ){ modded = true; spin = nuSpin; } - void lhePrt::setPDG( std::string_view nuPDG ){ modded = true; pdg = nuPDG; } - void lhePrt::setStatus( std::string_view nuSt ){ modded = true; status = nuSt; } - void lhePrt::setMothers( std::vector nuMum ){ modded = true; mothers[0] = nuMum[0]; mothers[1] = nuMum[1]; } - void lhePrt::setColors( std::vector nuCol ){ modded = true; icol[0] = nuCol[0]; icol[1] = nuCol[1]; } - bool lhePrt::isModded(){ return modded; } - bool lhePrt::isWritten(){ return written; } - std::shared_ptr lhePrt::getContent(){ - if( !isWritten() || isModded() ){ writer(); } - return content; - } - lhePrt::lhePrt(){ return; } - lhePrt::lhePrt( std::pair& prtInfo ){ - status = std::to_string( prtInfo.first ); - pdg = std::to_string( prtInfo.second ); - } - lhePrt::lhePrt( std::pair& prtInfo ){ - status = std::string_view( prtInfo.first ); - pdg = std::string_view( prtInfo.second ); - } - lhePrt::lhePrt( const std::string_view originFile, const size_t& beginLine, const size_t& endLine ) - { - sourceFile = originFile.substr( beginLine, endLine - beginLine ); - auto evLine = nuWordSplitter( sourceFile ); - pdg = evLine->at(0); - status = evLine->at(1); - mothers[0] = evLine->at(2); mothers[1] = evLine->at(3); - icol[0] = evLine->at(4); icol[1] = evLine->at(5); - for( int k = 6 ; k < 9 ; ++k){ - mom[k-6] = evLine->at(k); - } - energy = evLine->at(9); - mass = evLine->at(10); - vtim = evLine->at(11); - spin = evLine->at(12); - if( evLine->size() > 13 ){ comment = sourceFile.substr( sourceFile.find( "#" ) ); } - } - void lhePrt::writer(){ - if( isWritten() && !isModded() ){ return; } - if( !isModded() ){ content = std::make_shared( sourceFile ); return; } - *content = ""; - for( size_t k = 0; k < 10 - pdg.length() ; ++k ){ *content += " "; } - *content += std::string(pdg) + " " + std::string(status); - for( auto mum : mothers ){ *content += " " + std::string( mum ); } - for( auto col : icol ){ *content += " " + std::string( col ); } - for( auto pval : mom ){ *content += " " + std::string(pval); } - *content += " " + std::string( energy ) + " " + std::string( mass ) + " " + std::string( vtim ) + " " + std::string( spin ); - if( comment != "" ){ *content += " # " + std::string( comment ); } - *content += "\n"; - modded = false; - written = true; - } - - // ZW: struct for handling LHE format event block - evHead event::getHead(){ return header; } - std::vector> event::getPrts(){ return prts; } - std::vector> event::getWgts(){ return rwgt; } - void event::setHead( evHead head ){ modded = true; header = head; } - void event::addPrt( std::shared_ptr prtcl ){ modded = true; prts.push_back( prtcl ); } - void event::addPrt( lhePrt prtcl ){ modded = true; prts.push_back( std::make_shared(prtcl) ); } - void event::setPrts( std::vector> prtcls ){ modded = true; prts = prtcls; } - void event::addWgt( bodyWgt nuWgt ){ addedWgt = true; rwgt.push_back( std::make_shared(nuWgt) ); } - void event::addWgt( std::shared_ptr nuWgt ){ modded = true; rwgt.push_back( nuWgt ); } - void event::addWgt( bodyWgt nuWgt, std::string& id ){ addedWgt = true; nuWgt.setId( id ); rwgt.push_back( std::make_shared(nuWgt) ); } - void event::addWgt( std::shared_ptr nuWgt, std::string& id ){ modded = true; nuWgt->setId( id ); rwgt.push_back( nuWgt ); } - bool event::newWeight(){ return addedWgt; } - int event::getNprt(){ return prts.size(); } - bool event::isModded() { return modded; } - bool event::isModded( bool deep ) { - if( !deep ){ return modded; } - bool modStat = modded; - for( auto child : children ){ if(modStat){ return modStat; }; modStat = (modStat || child->isModded( deep )); } - modStat = (modStat || header.isModded()); - for( auto prt : prts ){ if(modStat){ return modStat; }; modStat = (modStat || prt->isModded()); } - for( auto wgt : rwgt ){ if(modStat){ return modStat; }; modStat = (modStat || wgt->isModded()); } - return modStat; - } - event::event(){ return; } - event::event( std::vector>& prtInfo ){ - header.setNprt( std::to_string( prtInfo.size() ) ); - for( auto& prt : prtInfo ){ - prts.push_back( std::make_shared( prt ) ); - } - } - event::event( std::vector>& prtInfo ){ - header.setNprt( prtInfo.size() ); - for( auto& prt : prtInfo ){ - prts.push_back( std::make_shared( prt ) ); - } - } - event::event( std::vector> prtInfo ){ - header.setNprt( std::to_string( prtInfo.size() ) ); - prts = prtInfo; - } - event::event( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) - : xmlNode(originFile, begin, childs) { - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" \n\r\f\t\v", begin+1); - if( trueStart == npos ){ return; } - auto vals = lineFinder( originFile.substr( trueStart, originFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(originFile, vals->at(0) + trueStart, vals->at(1) + trueStart + 1 ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(originFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart + 1) ); - } - } - event::event( const xmlNode& originFile ) - : xmlNode( originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", start+1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - event::event( const xmlNode* originFile ) - : xmlNode( *originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - event::event( const std::shared_ptr& originFile ) - : xmlNode( *originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - event::event( xmlTree& originFile ) - : xmlNode( originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - event::event( xmlTree* originFile ) - : xmlNode( *originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - event::event( std::shared_ptr originFile ) - : xmlNode( *originFile ) { - size_t trueStart = xmlFile.find_first_not_of(" \n\r\f\t\v", structure.getContStart() + 1); - auto vals = lineFinder( xmlFile.substr( trueStart, xmlFile.find("<", trueStart + 3 ) - trueStart + 3 )); - header = evHead(xmlFile, vals->at(0) + trueStart, vals->at(1) + trueStart ); - prts.reserve(vals->size()); - for( int k = 1 ; k < std::stoi(std::string(header.getNprt())) + 1; ++k) - { - prts.push_back( std::make_shared(xmlFile, vals->at(k) + trueStart + 1, vals->at(k+1) + trueStart) ); - } - } - bool event::prtsAreMod(){ - for( auto prt : prts ){ if( prt->isModded() ){ return true; } } - return false; - } - bool event::headIsMod(){ - return header.isModded(); - } - bool event::isSpecSort() const { return specSorted; } - sortFcn event::getSortFcn() const { return eventSort; } - statSort event::getStatSort() const { return specSort; } - bool event::hasRwgt(){ - if( rwgt.size() > 0 ){ return true; } - return false; - } - bool event::rwgtChild(){ - if( childRwgt != nullptr ){ return true; } - for( auto child : children ){ if( clStringComp(child->getName(), std::string("rwgt") ) ){ childRwgt = child; return true; } } - return false; - } - bool event::bothRwgt(){ return (hasRwgt() && rwgtChild() ); } - bool event::eitherRwgt(){ return (hasRwgt() || rwgtChild() ); } - bool event::initProcMap(bool hard) - { - if(!hard){ if( procMap.size() > 0 ){ return true; } } - for( auto prt : prts ){ - procMap.insert({prt->getStatus(), std::vector()}); - procOrder.insert({prt->getStatus(), std::vector()}); - } - for( auto prt : prts ){ - procMap[prt->getStatus()].push_back( prt->getPDG() ); - } - for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ - procOrder[stat->first] = *stoiSort( stat->second ); - } - hasBeenProc = true; - return true; - } - bool event::initProcMap( sortFcn sorter, bool hard ) - { - if(!hard){ if( procMap.size() > 0 ){ return true; } } - specSorted = false; - eventSort = sorter; - for( auto prt : prts ){ - procMap.insert({prt->getStatus(), std::vector()}); - procOrder.insert({prt->getStatus(), std::vector()}); - } - for( auto prt : prts ){ - procMap[prt->getStatus()].push_back( prt->getPDG() ); - } - for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ - procOrder[stat->first] = *sorter( stat->second ); - } - hasBeenProc = true; - return true; - } - bool event::initProcMap( statSort sorter, bool hard ) - { - if(!hard){ if( procMap.size() > 0 ){ return true; } } - specSorted = true; - specSort = sorter; - for( auto prt : prts ){ - procMap.insert({prt->getStatus(), std::vector()}); - procOrder.insert({prt->getStatus(), std::vector()}); - } - for( auto prt : prts ){ - procMap[prt->getStatus()].push_back( prt->getPDG() ); - } - for( auto stat = procMap.begin(); stat!= procMap.end(); ++stat ){ - procOrder[stat->first] = *sorter(stat->first, stat->second ); - } - hasBeenProc = true; - return true; - } - bool event::inRwgtChild( std::string_view nameIn ){ - for( auto child : childRwgt->getChildren() ){ - for( auto tag : child->getTags() ){ if(clStringComp(tag->getVal(), nameIn)){ return true; } } - } - return false; - } - bool event::checkRwgtOverlap(){ - for( auto wgt : rwgt ){ - for( auto tag : wgt->getTags() ){ if( inRwgtChild( tag->getVal() ) ){ return true; } } - } - return false; - } - void event::childRwgtWriter(){ - if( rwgtChild() ){ nodeContent += *childRwgt->nodeWriter(); } - } - void event::vecRwgtWriter( bool midNode ){ - if( !midNode ){ nodeContent += "\n"; } - for( auto wgt : rwgt ){ - nodeContent += *wgt->nodeWriter(); - } - nodeContent += "\n"; - } - void event::rwgtWriter(){ - if( bothRwgt() ){ if( checkRwgtOverlap() ){ childRwgtWriter(); return; } - childRwgtWriter(); - nodeContent.erase( nodeContent.size() - 8, 8 ); - vecRwgtWriter(); - return; - } else { - if( hasRwgt() ){ vecRwgtWriter(); return; } - if( rwgtChild() ){ childRwgtWriter(); return; } - } - } - void event::contWriter() { - nodeContent = "\n" + *header.getContent(); - for( auto prt : prts ){ - nodeContent += *prt->getContent(); - } - } - void event::childWriter() { - for( auto child : children ){ - if( clStringComp( child->getName(), std::string("wgt") ) ){ continue; } - nodeContent += *child->nodeWriter(); - } - } - void event::fullWriter() { - if( isModded( false ) ){ - headWriter(); - contWriter(); - childWriter(); - rwgtWriter(); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - modded = false; - } else if( !isWritten() ){ - writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); - written = true; - } - } - void event::fullWriter( bool deep ){ - if( !deep ){ fullWriter(); return; } - if( isModded( true ) ){ - headWriter(); - contWriter(); - childWriter(); - rwgtWriter(); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - modded = false; - written = true; - } else if( !isWritten() ){ - writtenSelf = std::make_shared( xmlFile.substr( start, end - start ) ); - written = true; - } - } - void event::appendWgts(){ - if( !addedWgt ){ return; } - writtenSelf->erase( writtenSelf->size() - 17, 17 ); - for( auto wgt : rwgt ){ - if( !wgt->isWritten() ){ wgt->appendWgt( writtenSelf ); } - } - *writtenSelf += "\n
\n"; - } - std::shared_ptr event::nodeWriter() { - if( isModded(false) || !isWritten() ){ fullWriter(); return writtenSelf; } - if( addedWgt ){ appendWgts(); } - return writtenSelf; - } - std::shared_ptr event::nodeWriter( bool recursive ){ - if( isModded( recursive ) || !isWritten() ){ fullWriter(); return writtenSelf; } - if( addedWgt ){ appendWgts(); } - return writtenSelf; - } - std::map> &event::getProc(){ - if( initProcMap() ){ return procMap; } - else throw std::runtime_error("Error while parsing event node."); - } - std::map> &event::getProcOrder(){ - if( initProcMap() ){ return procOrder; } - else throw std::runtime_error("Error while parsing event node."); - } - std::map> event::getProc() const { - if ( hasBeenProc ){ return procMap; } - else throw std::runtime_error("Const declaration of event node before it has been procesed."); - } - std::map> event::getProcOrder() const { - if ( hasBeenProc ){ return procOrder; } - else throw std::runtime_error("Const declaration of event node before it has been procesed."); - } - std::map> &event::getProc(sortFcn sorter){ - if( initProcMap(sorter) ){ return procMap; } - else throw std::runtime_error("Error while parsing event node."); - } - std::map> &event::getProcOrder(sortFcn sorter){ - if( initProcMap(sorter) ){ return procOrder; } - else throw std::runtime_error("Error while parsing event node."); - } - std::map> &event::getProc(statSort sorter){ - if( initProcMap(sorter) ){ return procMap; } - else throw std::runtime_error("Error while parsing event node."); - } - std::map> &event::getProcOrder(statSort sorter){ - if( initProcMap(sorter) ){ return procOrder; } - else throw std::runtime_error("Error while parsing event node."); - } - - event& makeEv( std::vector>& particles ){ - static auto returnEvent = event( particles ); - return returnEvent; - } - - std::vector> getParticles( event& ev ){ - return ev.getPrts(); - } - - // ZW: struct for handling the first line of - // LHE format init tag - bool lheInitHead::isWritten(){ return written; } - bool lheInitHead::isModded(){ return modded; } - std::shared_ptr lheInitHead::getContent(){ - if( isModded() || !isWritten() ){ writer(); } - return content; } - lheInitHead::lheInitHead( std::string_view initHead ){ - auto vals = *nuBlankSplitter( initHead ); - if( vals.size() < 10 ){ return; } - idbmup[0] = vals[0]; idbmup[1] = vals[1]; - ebmup[0] = vals[2]; ebmup[1] = vals[3]; - pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; - pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; - idwtup = vals[8]; nprup = vals[9]; - } - lheInitHead::lheInitHead( xmlNode& initNode ) - { - if( initNode.getName() != "init" ){ return; } - auto startPos = initNode.getFile().find( ">", initNode.getStart() ) + 1; - auto endPos = initNode.getFile().find( "\n", startPos ); - auto vals = *nuBlankSplitter( initNode.getFile().substr( startPos, endPos - startPos ) ); - idbmup[0] = vals[0]; idbmup[1] = vals[1]; - ebmup[0] = vals[2]; ebmup[1] = vals[3]; - pdfgup[0] = vals[4]; pdfgup[1] = vals[5]; - pdfsup[0] = vals[6]; pdfsup[1] = vals[7]; - idwtup = vals[8]; nprup = vals[9]; - } - void lheInitHead::writer(){ - *content = std::string(idbmup[0]) + " " + std::string(idbmup[1]) + " " + std::string(ebmup[0]) + " " + std::string(ebmup[1]) + " " + std::string(pdfgup[0]) - + " " + std::string(pdfgup[1]) + " " + std::string(pdfsup[0]) + " " + std::string(pdfsup[1]) + " " + std::string(idwtup) + " " + std::string(nprup) +"\n"; - written = true; - modded = false; - } - - // ZW: struct for handling process lines - // in LHE format init tag - bool lheInitLine::isWritten(){ return written; } - bool lheInitLine::isModded(){ return modded; } - std::shared_ptr lheInitLine::getContent(){ - if( isModded() || !isWritten() ){ writer(); } - return content; } - lheInitLine::lheInitLine(){} - lheInitLine::lheInitLine( std::string_view procLine ) - { - auto vals = *nuBlankSplitter( procLine ); - if( vals.size() < 4 ){ return; } - xsecup = vals[0]; - xerrup = vals[1]; - xmaxup = vals[2]; - lprup = vals[3]; - } - void lheInitLine::writer(){ - *content = std::string(xsecup) + " " + std::string(xerrup) + " " + std::string(xmaxup) + " " + std::string(lprup) + "\n"; - written = true; - modded = false; - } - - // ZW: struct for handling single parameter line in - // SLHA format parameter card - void paramVal::parse(){ - id = std::stoi( std::string(idStr) ); - value = std::stod( std::string(valStr) ); - } - paramVal::paramVal(){ realLine = ""; idStr = ""; valStr = ""; } - paramVal::paramVal( std::string_view paramLine, bool parseOnline ) - { - if( paramLine.find("\n") != npos ){ - auto startPos = paramLine.find_first_not_of(" \n", paramLine.find("\n")); - if( startPos!= npos ){ - auto endPos = paramLine.find("\n", startPos); - realLine = paramLine.substr(startPos, endPos - startPos - 1); - } else{ - realLine = paramLine.substr( 0, paramLine.find("\n") - 1 ); - } - } - realLine = paramLine; - auto vals = *nuBlankSplitter( realLine ); - idStr = vals[0]; - valStr = vals[1]; - if( parseOnline ){ - if( vals.size() > 2 ) - { - auto comStart = realLine.find("#"); - comStart = realLine.find_first_not_of( " #", comStart ); - comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); - } - parse(); } - } - bool paramVal::isMod(){ return modded; } - std::shared_ptr paramVal::selfWrite(){ - auto writeVal = std::make_shared(""); - if( isMod() ) - { - for( int k = idStr.size() ; k < 5 ; ++k ){ *writeVal += " "; } - *writeVal += std::string( idStr ) + " " + std::string( valStr ); - if( comment.size() != 0 ){ - *writeVal += " # " + std::string( comment ); - } - *writeVal += "\n"; - } - else{ *writeVal = std::string( realLine ) + "\n"; } - return writeVal; - } - - // ZW: struct for handling single DECAY line - // in SLHA format parameter card - void decVal::parse() { - auto vals = *nuBlankSplitter( realLine ); - id = std::stoi( std::string(vals[1]) ); - value = std::stod( std::string(vals[2]) ); - if( vals.size() > 3 ) - { - auto comStart = realLine.find("#"); - comment = realLine.substr( comStart, realLine.find("\n", comStart) - comStart ); - } - } - decVal::decVal( std::string_view paramLine, bool parseOnline ) : paramVal( paramLine, false ) - { - if( parseOnline ){ parse(); } - } - std::shared_ptr decVal::selfWrite() { - auto writeVal = std::make_shared(""); - if( isMod() ) - { - *writeVal += "DECAY " + std::string( idStr ) + " " + std::string( valStr ); - if( comment.size() != 0 ){ - *writeVal += " # " + std::string( comment ); - } - *writeVal += "\n"; - } - else{ *writeVal = std::string( realLine ) + "\n"; } - return writeVal; - } - - // ZW: struct for handling parameter block - // in SLHA format parameter card - void paramBlock::parse( bool parseOnline ){ - if( realBlock.size() == 0 ){ return; } - if( !(clStringComp(realBlock.substr(startPt+1, 5), std::string("block"))) ){ startPt = clStringFind( realBlock, std::string("\nblock") ); } - auto namePt = realBlock.find_first_not_of( " ", startPt + 7 ); - name = realBlock.substr( namePt, realBlock.find_first_of( " \n", namePt ) - namePt ); - if( realBlock.find( " ", namePt ) < realBlock.find( "\n", namePt ) ) - {comment = realBlock.substr( namePt + name.size(), realBlock.find( "\n", namePt ) - namePt - name.size() ); } - auto paramLines = blockLineStractor( realBlock.substr( startPt ) ); - params.reserve( paramLines.size() ); - for( auto line : paramLines ) - { - params.push_back( paramVal( line, parseOnline ) ); - } - } - paramBlock::paramBlock(){ return; } - paramBlock::paramBlock( std::string_view paramSet, bool parseOnline ) - { - realBlock = paramSet; - startPt = clStringFind( realBlock, std::string("\nB") ); - if( parseOnline ){ parse(parseOnline); } - } - bool paramBlock::isMod(){ return modded; } - std::shared_ptr paramBlock::selfWrite(){ - auto writeBlock = std::make_shared(""); - if( isMod() ) - { - *writeBlock += "\nBLOCK " + std::string(name); - if( comment.size() > 0 ){ - *writeBlock += " # " + std::string( comment ); - } - *writeBlock += "\n"; - for ( auto val : params ) - { - *writeBlock += *val.selfWrite(); - } - } - else{ if( startPt == npos ){ - *writeBlock += realBlock; - } else { - *writeBlock = realBlock.substr( startPt ); - } } - return writeBlock; - } - - // ZW: struct for handling DECAY lines - // in SLHA format parameter card - void decBlock::parse( bool parseOnline ){ - if( realBlock.size() == 0 ){ return; } - auto decLines = clFindEach( realBlock, std::string("\ndecay") ); - decays.reserve(decLines->size()); - if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) - { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } - for( auto pts : *decLines ) - { - auto lineBr = realBlock.find( "\n", pts + 1 ); - if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } - decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); - } - } - void decBlock::parse( std::shared_ptr> decLines, bool parseOnline ) { - decays.reserve(decLines->size()); - if( realBlock.size() > 5 ){ if( clStringComp( realBlock.substr(0,5), std::string("decay")) ) - { decays.push_back( decVal(realBlock.substr( 0, realBlock.find("\n") ), parseOnline) ); } } - for( auto pts : *decLines ) - { - auto lineBr = realBlock.find( "\n", pts + 1 ); - if( lineBr == npos ){ decays.push_back( decVal( realBlock.substr( pts + 1), parseOnline ) ); continue; } - decays.push_back( decVal( realBlock.substr( pts + 1, lineBr - pts - 1 ), parseOnline ) ); - } - } - decBlock::decBlock( std::string_view paramSet, bool parseOnline ) : paramBlock( paramSet, parseOnline ) - { - realBlock = paramSet; - if( parseOnline ){ parse(parseOnline); } - } - std::shared_ptr decBlock::selfWrite() { - auto writeBlock = std::make_shared(""); - *writeBlock += "\n"; - for ( auto val : decays ) - { - *writeBlock += *val.selfWrite(); - } - return writeBlock; - } - - // ZW: struct for handling SLHA parameter cards - void lesHouchesCard::parse( bool parseOnline ) - { - if( parsed ){ return; } - if( xmlFile.substr(start,1).find_first_of("BbDd#") == npos ){ start = clStringFindIf( xmlFile, std::string("\n"), lambdaNu ); } - auto blockPts = clFindEach( xmlFile, std::string("\nblock") ); - auto decLines = clFindEach( xmlFile, std::string("\ndecay") ); - header = xmlFile.substr( start, std::min( blockPts->at(0), decLines->at(0) ) - start ); - for( size_t k = 0 ; k < blockPts->size() - 1 ; ++k ) - { - blocks.push_back( paramBlock( xmlFile.substr( blockPts->at(k), blockPts->at(k+1) - blockPts->at(k) ), parseOnline ) ); - } - blocks.push_back(paramBlock(xmlFile.substr(blockPts->at(blockPts->size()-1), clStringFindIf( xmlFile, std::string("\n"), - lambda, blockPts->at(blockPts->size()-1) + 1) - blockPts->at(blockPts->size()-1)), parseOnline)); - decays = decBlock( xmlFile ); - decays.parse( decLines, parseOnline ); - parsed = true; - } - lesHouchesCard::lesHouchesCard( const std::string_view originFile, const size_t& begin, bool parseOnline ){ - xmlFile = originFile; start = begin; - modded = false; blockStart = clStringFindIf( xmlFile, std::string("\n"), lambda, start + 1); end = xmlFile.find(" lesHouchesCard::selfWrite(){ - auto writeCard = std::make_shared(header); - if( isMod() ) - { for( auto block : blocks ) - { *writeCard += *block.selfWrite(); } - *writeCard += *decays.selfWrite(); } - else{ - if( end != npos ){ *writeCard += std::string( xmlFile.substr( blockStart, end - blockStart ) ); - } else{ *writeCard += std::string( xmlFile.substr( blockStart ) ); } - } - return writeCard; - } - - std::shared_ptr slhaNode::getParameters(){ - modded = true; - return parameterCard; - } - slhaNode::slhaNode() : xmlNode(){} - slhaNode::slhaNode( lesHouchesCard parameters ) : xmlNode(){ - parameterCard = std::make_shared( parameters ); - pCardInit = true; - } - slhaNode::slhaNode( std::shared_ptr parameters ) : xmlNode(){ - parameterCard = parameters; - pCardInit = true; - } - slhaNode::slhaNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ - parameterCard = std::make_shared( node.getFile(), node.getStart(), parseOnline ); - } - slhaNode::slhaNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ - parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); - } - slhaNode::slhaNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ - parameterCard = std::make_shared( node->getFile(), node->getStart(), parseOnline ); - } - slhaNode::slhaNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ - parameterCard = std::make_shared( tree.getOrigin(), tree.getStart(), parseOnline ); - } - slhaNode::slhaNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ - parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); - } - slhaNode::slhaNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ - parameterCard = std::make_shared( tree->getOrigin(), tree->getStart(), parseOnline ); - } - slhaNode::slhaNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) - : xmlNode( originFile, begin ){ - if( parse() ){ parameterCard = std::make_shared( content, begin, parseOnline ); pCardInit = true; } - } - void slhaNode::headWriter(){ - nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - nodeHeader += ">"; - } - void slhaNode::endWriter(){ nodeEnd += "\n"; } - void slhaNode::contWriter(){ - if( pCardInit ){ - nodeContent = *parameterCard->selfWrite(); - } else { - nodeContent = content; - } - } - - // ZW: struct for handling LHE init nodes - std::shared_ptr initNode::getHead(){ return initHead; } - std::vector> initNode::getLines(){ return initLines; } - void initNode::setHead( std::shared_ptr head ){ modded = true; initHead = head; } - void initNode::setLines( std::vector> lines ){ modded = true; initLines = lines; initHead->nprup = std::to_string( initLines.size() ); } - void initNode::addLine( std::shared_ptr line ){ modded = true; initLines.push_back( line ); initHead->nprup = std::to_string( initLines.size() ); } - initNode::initNode() : xmlNode(){ name = "init"; } - initNode::initNode( const std::string_view originFile, const size_t& begin, bool parseOnline ) - : xmlNode( originFile, begin ){ - content = originFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( xmlNode& node, bool parseOnline ) : xmlNode( node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( xmlNode* node, bool parseOnline ) : xmlNode( *node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( std::shared_ptr node, bool parseOnline ) : xmlNode( *node ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( xmlTree tree, bool parseOnline ) : xmlNode( tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( std::shared_ptr tree, bool parseOnline ) : xmlNode( *tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - initNode::initNode( xmlTree* tree, bool parseOnline ) : xmlNode( *tree ){ - content = xmlFile.substr( structure.getContStart(), structure.getContEnd() - structure.getContStart() ); - if( parseOnline ){ parse( parseOnline ); } - } - bool initNode::parseContent(){ - if( content.size() == 0 ){ return false; } - auto linebreaks = lineFinder( content ); - if( linebreaks->size() == 0 ){ return false; } - initHead = std::make_shared(content.substr( 0, linebreaks->at(0) ) ); - for( size_t k = 0 ; k < linebreaks->size() - 1 ; ++k ){ - initLines.push_back( std::make_shared( content.substr( linebreaks->at(k), linebreaks->at(k+1) - linebreaks->at(k) ) ) ); - } - return true; - } - void initNode::contWriter(){ - if( isModded() ){nodeContent = std::string( content ); return; } - nodeContent = *initHead->getContent(); - for( auto line : initLines ){ - nodeContent += *line->getContent(); - } - } - - // ZW: struct for explicitly handling LHE header nodes - size_t lheHead::addWgtGroup( std::shared_ptr& wgtGroup ){ - hasRwgt = true; - modded = true; - if( wgtGrpInit( wgtGroup ) ){ - rwgtNodes->addGroup( wgtGroup ); - } - return (rwgtNodes->noGrps() - 1); - } - size_t lheHead::addWgtGroup( weightGroup wgtGroup ){ - hasRwgt = true; - modded = true; - auto wgtGrpPtr = std::make_shared( wgtGroup ); - if( wgtGrpInit( wgtGrpPtr ) ){ - rwgtNodes->addGroup( std::make_shared( wgtGroup ) ); - } - return (rwgtNodes->noGrps() - 1); - } - void lheHead::addWgt( size_t index, std::shared_ptr nuWgt ){ - if( index >= (size_t)rwgtNodes->getGroups().size() ) - throw std::range_error( "Appending weight to uninitialised weightgroup." ); - hasRwgt = true; - modded = true; - rwgtNodes->addWgt( index, nuWgt ); - } - void lheHead::addWgt( size_t index, headWeight nuWgt ){ - if( index >= (size_t)rwgtNodes->getGroups().size() ) - throw std::range_error( "Appending weight to uninitialised weightgroup." ); - hasRwgt = true; - modded = true; - rwgtNodes->addWgt( index, nuWgt ); - } - void lheHead::addWgt( size_t index, std::shared_ptr nuWgt, std::string idTagg ){ - if( index >= (size_t)rwgtNodes->getGroups().size() ) - throw std::range_error( "Appending weight to uninitialised weightgroup." ); - hasRwgt = true; - modded = true; - nuWgt->setId( idTagg ); - rwgtNodes->addWgt( index, nuWgt ); - } - void lheHead::addWgt( size_t index, headWeight nuWgt, std::string idTagg ){ - if( index >= (size_t)rwgtNodes->getGroups().size() ) - throw std::range_error( "Appending weight to uninitialised weightgroup." ); - hasRwgt = true; - modded = true; - nuWgt.setId( idTagg ); - rwgtNodes->addWgt( index, nuWgt ); - } - void lheHead::setInitRwgt( initRwgt initWgt ){ hasRwgt = true; modded = true; rwgtNodes = std::make_shared(initWgt); } - void lheHead::setInitRwgt( std::shared_ptr initWgt ){ hasRwgt = true; modded = true; rwgtNodes = initWgt; } - std::vector> lheHead::getWgtGroups(){ return rwgtNodes->getGroups(); } - std::shared_ptr lheHead::getInitRwgt(){ return rwgtNodes; } - std::shared_ptr lheHead::getParameters(){ return parameters; } - void lheHead::setParameters( std::shared_ptr params ){ parameters = params; } - bool lheHead::rwgtInc(){ return hasRwgt; } - lheHead::lheHead(){ return; } - lheHead::lheHead( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) - : xmlNode(originFile, begin, childs){ - xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); - if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} - for( auto child : children ){ - if (child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if (child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( xmlNode& node ) : xmlNode(node){ - for( auto child : node.getChildren() ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( xmlNode* node ) : xmlNode(*node){ - for( auto child : node->getChildren() ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( std::shared_ptr node ) : xmlNode( *node ){ - for( auto child : node->getChildren() ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( xmlTree tree ) : xmlNode( tree ){ - for( auto child : children ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( std::shared_ptr tree ) : xmlNode( *tree ){ - for( auto child : children ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - lheHead::lheHead( xmlTree* tree ) : xmlNode( *tree ){ - for( auto child : children ){ - if ( child->getName() == "slha" ){ parameters = std::make_shared( *child ); continue; } - if ( child->getName() == "initrwgt" ){ rwgtNodes = std::make_shared( *child ); continue; } - } - } - bool lheHead::wgtGrpInit( std::shared_ptr& wgtGrp ){ - if( wgtGrpIsInit ){ return true; } - if( rwgtNodes == nullptr ){ - rwgtNodes = std::make_shared(); - wgtGrpIsInit = true; - rwgtNodes->addGroup( wgtGrp ); - return false; - } else throw std::runtime_error( "Error while initiating return LHE file header (initrwgt node is defined in an unrecognised manner)." ); - } - void lheHead::setRelChild(){ - if( relChildSet ){ return; } - relChild.reserve( children.size() ); - for( size_t k = 0 ; k < children.size() ; ++k ){ - auto child = &children[k]; - if( (*child)->getName() == "slha" ){ continue; } - if( (*child)->getName() == "initrwgt" ){ continue; } - relChild.push_back( k ); - } - relChildSet = true; - } - bool lheHead::parseChildren( bool recursive ){ - bool status = true; - for( auto child : children ){ - if( child->getName() == "slha" || child->getName() == "initrwgt" ){ continue; } - child->parser( recursive ); - status = (status && child->isParsed() ); - deepParsed = true; - } - return status; - } - void lheHead::headWriter(){ - nodeHeader = "getId()) + "=\"" + std::string(tag->getVal()) + "\""; - } - nodeHeader += ">\n"; - } - void lheHead::childWriter(){ - setRelChild(); - for( auto relKid : relChild ){ - nodeContent += *(children[relKid]->nodeWriter()); - } - if( parameters != nullptr ){ nodeContent += *parameters->nodeWriter(); } - if( hasRwgt ){ - nodeContent += *rwgtNodes->nodeWriter(); - } - } - void lheHead::fullWriter(){ - if( isModded() ){ - headWriter(); - contWriter(); - childWriter(); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - written = true; - } - } - - // ZW: struct for keeping track of appended weights in LHE node, - // since weight information is stored both in the header - // and in the individual events - newWgt::newWgt( std::shared_ptr heaWgt, std::vector> bodWgts ){ - headWgt = heaWgt; bodyWgts = bodWgts; - } - newWgt::newWgt( std::shared_ptr heaWgt, std::shared_ptr> wgts ){ - headWgt = heaWgt; - bodyWgts = std::vector>(wgts->size()); - auto idTag = std::string(headWgt->getTag()); - if( idTag != "" ){ - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i), idTag); - } - } else{ - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i)); - } - } - } - newWgt::newWgt( std::string_view parameters, std::shared_ptr> wgts, std::string idTag ){ - headWgt = std::make_shared(parameters, idTag); - bodyWgts = std::vector>(wgts->size()); - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i), idTag); - } - } - newWgt::newWgt( std::string_view parameters, int idNum, std::shared_ptr> wgts, std::string idTag ){ - std::string newTag = std::string( idTag ) + "_" + std::to_string( idNum ); - headWgt = std::make_shared(parameters, newTag); - bodyWgts = std::vector>(wgts->size()); - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i), newTag); - } - } - newWgt::newWgt( std::string& parameters ){ - headWgt = std::make_shared(parameters); - } - newWgt::newWgt( std::string& parameters, std::string& idTag ){ - headWgt = std::make_shared(parameters, idTag); - } - std::shared_ptr newWgt::getHeadWgt(){ return headWgt; } - std::vector> newWgt::getBodyWgts(){ return bodyWgts; } - void newWgt::addBdyWgts( std::shared_ptr> wgts ){ - auto idTag = std::string(headWgt->getTag()); - if( idTag != "" ){ - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i), idTag); - } - } else{ - for( size_t i = 0 ; i < wgts->size() ; ++i ){ - bodyWgts[i] = std::make_shared(wgts->at(i)); - } - } - } - - // ZW: general struct for handling LHE files explicitly - lheNode::lheNode() : xmlNode(){} - lheNode::lheNode( const std::string_view originFile, const size_t& begin, const std::vector>& childs ) - : xmlNode(originFile, begin, childs){ - //xmlFile = originFile; start = begin; children = childs; size_t trueStart = originFile.find_first_not_of(" ", begin+1); - //if( trueStart != npos ){name = originFile.substr( trueStart, originFile.find_first_of(">/ ", trueStart) - trueStart );} - for( auto child : children ){ - if( child->getName() == "header" ){ header = std::make_shared( *child ); continue; } - if( child->getName() == "init" ){ init = std::make_shared( *child ); continue; } - if( child->getName() == "event" ){ events.push_back( std::make_shared( *child ) ); continue; } - } - } - std::shared_ptr lheNode::getHeader(){ return header; } - std::shared_ptr lheNode::getInit(){ return init; } - std::vector> lheNode::getEvents(){ return events; } - bool lheNode::isModded(){ return modded; } - bool lheNode::isModded( bool deep ){ - if( !deep ){ return isModded(); } - bool modStat = isModded(); - for( auto child : children ){ modStat = ( modStat || child->isModded( deep ) ); } - for( auto event : events ){ modStat = ( modStat || event->isModded( deep ) ); } - return modStat; - } - void lheNode::setInit( std::shared_ptr initNod ){ init = initNod; } - void lheNode::setHeader( std::shared_ptr headNod ){ header = headNod; } - void lheNode::addWgt( size_t index, newWgt& addedWgt ){ - header->addWgt( index, addedWgt.getHeadWgt() ); - auto wgtsVec = addedWgt.getBodyWgts(); - for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ - events[k]->addWgt( wgtsVec[k] ); - } - } - void lheNode::addWgt( size_t index, newWgt& addedWgt, std::string& idTag ){ - header->addWgt( index, addedWgt.getHeadWgt(), idTag ); - auto wgtsVec = addedWgt.getBodyWgts(); - for( size_t k = 0 ; k < wgtsVec.size() ; ++k ){ - events[k]->addWgt( wgtsVec[k] ); - } - } - void lheNode::setRelStats( std::vector& particles ){ - relStat = particles; - } - std::vector& lheNode::getRelStats(){ - return relStat; - } - void lheNode::setSameSort( sortFcn& sortF ){ - particleSort = sortF; - } - sortFcn& lheNode::getSameSort(){ - return particleSort; - } - void lheNode::setStatSort( statSort& statS ){ - statParticleSort = statS; - } - statSort& lheNode::getStatSort(){ - return statParticleSort; - } - void lheNode::headerWriter(){ - nodeContent += "\n" + *header->nodeWriter(); - } - void lheNode::initWriter(){ - nodeContent += *init->nodeWriter(); - } - void lheNode::eventWriter(){ - for( auto event : events ){ - nodeContent += *event->nodeWriter(); - } - } - void lheNode::contWriter(){ - nodeContent = ""; - headerWriter(); - initWriter(); - eventWriter(); - } - void lheNode::fullWriter(){ - if( isModded( true ) ){ - headWriter(); - contWriter(); - endWriter(); - writtenSelf = std::make_shared( nodeHeader + nodeContent + nodeEnd ); - written = true; - modded = false; - } else if( !isWritten() ){ - writtenSelf = std::make_shared( xmlFile.substr(start, end - start ) ); - written = true; - } - } - std::shared_ptr lheNode::nodeWriter() { - if( isModded( true ) || !isWritten() ){ fullWriter(); } - return writtenSelf; - } - - // ZW: function for extracting event information from - // LHE files - std::vector>> valExtraction( lheNode& lheFile ) - { - bool getGs = true; - auto momVec = std::make_shared>(); - auto wgtVec = std::make_shared>(); - auto gVec = std::make_shared>(); - auto events = lheFile.getEvents(); - momVec->reserve( events.size() * 4 * std::stoi(std::string(events[0]->getHead().getNprt())) ); - wgtVec->reserve( events.size() ); - gVec->reserve( events.size() ); - if( getGs ){ - for( auto event : events ) - { - wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); - gVec->push_back( std::sqrt( 4.0 * M_PI * std::stod(std::string( event->getHead().getAQCD() )))); - for( auto prt : event->getPrts() ) - { - momVec->push_back(std::stod(std::string(prt->getE()))); - for( int p = 0 ; p < 3 ; ++p ) - { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } - } - } - } else{ - for( auto event : events ) - { - wgtVec->push_back(std::stod(std::string( event->getHead().getWeight() ))); - gVec->push_back( std::stod(std::string( event->getHead().getAQCD() ))); - for( auto prt : event->getPrts() ) - { - momVec->push_back(std::stod(std::string(prt->getE()))); - for( int p = 0 ; p < 3 ; ++p ) - { momVec->push_back(std::stod(std::string(prt->getMom()[p]))); } - } - - } } - return {momVec, gVec, wgtVec}; - } - - // ZW: fcn for parsing an LHE format event block - // and return a REX format event object - std::shared_ptr evPtrParsor( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - } - size_t equalSign = parseFile.find_first_of("=>", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: fcn for parsing an LHE format header - // and return a REX format lheHead object - std::shared_ptr lheHeadParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "init" ){ continue; } - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "slha" ){ - currNode->setParameters( std::make_shared(currNode->getChildren()[ currNode->getChildren().size() - 1 ]) ); - } - if( currNode->getChildren()[ currNode->getChildren().size() - 1 ]->getName() == "initrwgt" ){ - currNode->setInitRwgt( std::make_shared( currNode->getChildren()[ currNode->getChildren().size() - 1 ] ) ); - } - } - size_t equalSign = parseFile.find("=", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: fcn for parsing an LHE format file - // and return a REX format LHE node object - std::shared_ptr lheParser( std::string_view parseFile, size_t& initPos, size_t& endPos ) - { - auto currNode = std::make_shared(parseFile, initPos); - initPos = nodeStartFind( parseFile, initPos + 1 ); - while( initPos < endPos ) - { - if( parseFile.substr( initPos, 6 ) == "getEvents().push_back( evPtrParsor( parseFile, initPos, endPos ) ); - continue; - } else if( parseFile.substr( initPos, 7 ) == "setHeader(lheHeadParser( parseFile, initPos, endPos )); - continue; - } else if( parseFile.substr( initPos, 5 ) == "setInit( std::make_shared( parseFile, initPos ) ); - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, nodeEndFind( parseFile, endPos + 1 ) + 1); - continue; - } else { - currNode->addChild(xmlPtrParser( parseFile, initPos, endPos )); - } - } - size_t equalSign = parseFile.find("=", initPos); - size_t nodeInitEnd = parseFile.find(">", initPos); - while( equalSign < nodeInitEnd ){ - currNode->addTag( xmlTagParser(parseFile, equalSign) ); - } - initPos = nodeStartFind( parseFile, endPos ); - endPos = nodeEndFind( parseFile, endPos + 1 ); - return currNode; - } - - // ZW: struct for treating individual HEP - // processes, formatted based on PDG codes - // and the LHE particle status standard - struct lheProc { - public: - std::vector minusOne; - std::vector plusOne; - std::vector minusTwo; - std::vector plusTwo; - std::vector plusThree; - std::vector minusNine; - std::vector orderMOne; - std::vector orderOne; - std::vector orderMTwo; - std::vector orderTwo; - std::vector orderThree; - std::vector orderNine; - std::map> valVecs{{"-1", minusOne}, {"1", plusOne}, {"-2", minusTwo}, {"2", plusTwo}, {"3", plusThree}, {"-9", minusNine}}; - std::map> orderVecs{{"-1", orderMOne}, {"1", orderOne}, {"-2", orderMTwo}, {"2", orderTwo}, {"3", orderThree}, {"9",orderNine}}; - lheProc( event& eventNode ) - { - for( auto prt : eventNode.getPrts() ) - { - valVecs[prt->getStatus()].push_back(prt->getPDG()); - } - for( auto valVec = valVecs.begin() ; valVec!= valVecs.end() ; ++valVec ){ - if( valVec->second.size() == 0 ){ continue; } - orderVecs[valVec->first] = *stoiSort( valVec->second ); - } - } - std::shared_ptr writer(){ - auto written = std::make_shared(); - for( auto inits : valVecs["-1"] ){ - written->append(inits); - written->append(" "); - } - if( valVecs["2"].size() > 0 ){ - written->append("> "); - for( auto inits : valVecs["2"] ){ - written->append(inits); - written->append(" "); - } - } - written->append("> "); - for( auto inits : valVecs["1"] ){ - written->append(inits); - written->append(" "); - } - return written; - } - }; - - // ZW: fcn for uploading text files to the program - std::shared_ptr filePuller( const std::string& fileLoc ) - { - std::ifstream fileLoad( fileLoc ); - std::stringstream buffer; - buffer << fileLoad.rdbuf(); - auto fileContent = std::make_shared(buffer.str()); - //std::transform( fileContent->begin(), fileContent->end(), fileContent->begin(), ::tolower ); - buffer.str(std::string()); - fileLoad.close(); - return fileContent; - } - - // ZW: fcn for saving std::string to disk - bool filePusher( std::string fileLoc, std::string fileCont ) - { - std::ofstream fileWrite( fileLoc ); - if(!fileWrite){return false;} - fileWrite << fileCont; - fileWrite.close(); - return true; - } - - // ZW: fcn for extracting the full - // process information from an LHE event - std::shared_ptr>> pdgXtract( event& currEv ) - { - auto currProc = std::make_shared>>(); - auto &useProc = *currProc; - for( auto prt : currEv.getPrts() ) - { - useProc[ prt->getStatus() ].push_back(prt->getPDG()); - } - return currProc; - } - - template - bool chaoticVecComp( const std::vector& vec1, const std::vector order1, const std::vector& vec2, const std::vector order2 ) - { - if( vec1.size()!= vec2.size() ){ return false; } - for( size_t i = 0; i < vec1.size(); i++ ){ - if( vec1[order1[i]]!= vec2[order2[i]] ){ return false; } - } - return true; - } - - // ZW: fcn for comparing two processes in the - // format output by pdgXtract - bool sameProcString( std::map>& firstVec, std::map>& secVec, const std::vector& statVec ) - { - if( firstVec.size() != secVec.size() ){return false;} - for(auto code : statVec ) - { - if( firstVec[code] != secVec[code] ){ return false; } - } - return true; - } - - bool sameProcString( std::map>& firstVec, std::map>& firstOrder, - std::map>& secVec, std::map>& secondOrder, - std::vector& statVec ) - { - if( firstVec.size() != secVec.size() ){return false;} - for(auto code : statVec ) - { - if( !chaoticVecComp(firstVec[code], firstOrder[code], secVec[code], secondOrder[code]) ){ return false; } - } - return true; - } - - // ZW: fcn for processes in the lheProc struct format - bool procComp( lheProc& firstProc, lheProc& secProc, std::vector statVec ) - { - for( auto stat : statVec ) - { - if( firstProc.valVecs.at(stat).size() != secProc.valVecs.at(stat).size() ){ return false; } - if( !chaoticVecComp( firstProc.valVecs[stat], firstProc.orderVecs[stat], secProc.valVecs[stat], secProc.orderVecs[stat] ) ){ return false; } - } - return true; - } - - bool evProcComp( event& firstEv, event& secEv, std::vector statVec = {"-1", "1"} ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc()[stat].size()!= secEv.getProc()[stat].size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc()[stat], firstEv.getProcOrder()[stat], - secEv.getProc()[stat], secEv.getProcOrder()[stat] ) ){ return false; } - } - return true; - } - - bool evProcComp( event& firstEv, event& secEv, std::vector statVec, - sortFcn sorter ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], - secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } - } - return true; - } - - bool evProcComp( event& firstEv, event& secEv, std::vector statVec, - statSort sorter ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc(sorter)[stat].size()!= secEv.getProc(sorter)[stat].size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc(sorter)[stat], firstEv.getProcOrder(sorter)[stat], - secEv.getProc(sorter)[stat], secEv.getProcOrder(sorter)[stat] ) ){ return false; } - } - return true; - } - - bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec = {"-1", "1"} ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), - secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } - } - return true; - } - - bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, - sortFcn sorter ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), - secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } - } - return true; - } - - bool evProcComp( const event& firstEv, const event& secEv, std::vector statVec, - statSort sorter ) - { - for( auto stat : statVec ) - { - if( firstEv.getProc().at(stat).size()!= secEv.getProc().at(stat).size() ){ return false; } - if(!chaoticVecComp( firstEv.getProc().at(stat), firstEv.getProcOrder().at(stat), - secEv.getProc().at(stat), secEv.getProcOrder().at(stat) ) ){ return false; } - } - return true; - } - - bool eventComp::operator()( event& firstEv, event& secEv){ - if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} - else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} - } - bool eventComp::operator()( const event& firstEv, const event& secEv) const { - if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getStatSort());} - else {return evProcComp( firstEv, secEv, {"-1", "1"}, firstEv.getSortFcn() );} - } - bool eventComp::operator()(event& firstEv, event& secEv, std::vector statVec){ - if( firstEv.isSpecSort() ) {return evProcComp( firstEv, secEv, statVec, firstEv.getStatSort());} - else {return evProcComp( firstEv, secEv, statVec, firstEv.getSortFcn() );} - } - - // ZW: fcn for checking whether a list of pdgXtract format - // processes sourceProcList contains a given process newProc - bool procVecContains( std::vector>>>& sourceProcList, - std::map>& newProc, const std::vector& statVec ) - {\ - for( auto proc : sourceProcList ) - { - if( sameProcString( *proc, newProc, statVec ) ){ return true; } - } - return false; - } - - // ZW: fcn for checking whether a vector of lheProc structs - // procList contains a given lheProc nuProc - bool procListComp( const std::vector>& procList, lheProc& nuProc, std::vector statVec ) - { - if( procList.size() != 0 ){ - for(auto proc : procList ) - { - if( procComp( *proc, nuProc, statVec ) ){ return true; } - } - } - return false; - } - - bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec ) - { - if( procList.size()!= 0 ){ - for( auto ev : procList ) - { - if( evProcComp( *ev, nuEv, statVec ) ){ return true; } - } - } - return false; - } - - bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, - sortFcn sorter ) - { - if( procList.size()!= 0 ){ - for( auto ev : procList ) - { - if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } - } - } - return false; - } - - bool evProcListComp( std::vector>& procList, event& nuEv, std::vector statVec, - statSort sorter ) - { - if( procList.size()!= 0 ){ - for( auto ev : procList ) - { - if( evProcComp( *ev, nuEv, statVec, sorter ) ){ return true; } - } - } - return false; - } - - // ZW: fcn for extracting the different processes - // in a given REX format LHE file in the pdgXtract format - std::vector>>> procExtractor( lheNode& lheFile ) - { - std::vector>>> procList; - const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - for( auto event : lheFile.getEvents() ) - { - auto currProc = pdgXtract( *event ); - if( procVecContains( procList, *currProc, statVec ) ){ continue; } - procList.push_back(currProc); - } - return procList; - } - - // ZW: fcn for extracting the different processes - // in a given REX format LHE file in the lheProc format - std::vector> processPull( lheNode& lheFile, - std::vector statVec = { "-1", "1" } ) - { - //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector> procsList{}; - for( auto event : lheFile.getEvents() ) - { - auto currProc = std::make_shared( *event ); - if( procListComp( procsList, *currProc, statVec ) ){ continue; } - procsList.push_back( currProc ); - } - return procsList; - } - - std::vector> evProcessPull( lheNode& lheFile, std::vector statVec = { "-1", "1" } ) - { - //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector> procsList{}; - for( auto currEv : lheFile.getEvents() ) - { - if( evProcListComp( procsList, *currEv, statVec ) ){ continue; } - procsList.push_back( currEv ); - } - return procsList; - } - - std::vector> evProcessPull( lheNode& lheFile, - sortFcn sorter, - std::vector statVec = { "-1", "1" }) - { - //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector> procsList{}; - lheFile.setSameSort(sorter); - for( auto currEv : lheFile.getEvents() ) - { - if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } - procsList.push_back( currEv ); - } - return procsList; - } - - std::vector> evProcessPull( lheNode& lheFile, - statSort sorter, - std::vector statVec = { "-1", "1" }) - { - //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector> procsList{}; - lheFile.setStatSort(sorter); - for( auto currEv : lheFile.getEvents() ) - { - if( evProcListComp( procsList, *currEv, statVec, sorter ) ){ continue; } - procsList.push_back( currEv ); - } - return procsList; - } - - // ZW: fcn for keeping track of subprocess ordering - // in LHE file - size_t procPos( const std::vector>& evtSet, lheProc& currProc, - std::vector& statVec ) - { - for( size_t k = 0 ; k < evtSet.size() ; ++k ) - { - for( auto stat : statVec ) - { - if( evtSet[k]->valVecs[stat] != currProc.valVecs[stat] ){ break; } - } - return k; - } - return evtSet.size(); - } - - size_t evProcPos( const std::vector>& evtSet, event& currEv, - std::vector statVec = { "-1", "1" } ) - { - for( size_t k = 0 ; k < evtSet.size() ; ++k ) - { - if( evProcComp(*evtSet[k], currEv, statVec) ){ return k; } - } - return evtSet.size(); - } - - size_t evProcPos( const std::vector>& evtSet, event& currEv, - sortFcn sorter, std::vector statVec = {"-1", "1"} ) - { - for( size_t k = 0 ; k < evtSet.size() ; ++k ) - { - if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } - } - return evtSet.size(); - } - - size_t evProcPos( const std::vector>& evtSet, event& currEv, - statSort sorter, std::vector statVec = {"-1", "1"} ) - { - for( size_t k = 0 ; k < evtSet.size() ; ++k ) - { - if( evProcComp(*evtSet[k], currEv, statVec, sorter) ){ return k; } - } - return evtSet.size(); - } - - // ZW: fcn for extracting the subprocess ordering - // of LHE file - std::vector>> procOrder( lheNode& lheFile, const std::vector>& evtSet, - std::vector statVec = { "-1", "1" } ) - { - //const static std::vector statVec = { "-1", "1", "-2", "2", "3", "-9" }; - std::vector>> eventBools( evtSet.size(), std::make_shared> ( lheFile.getEvents().size() )); - //std::vector> pracBools( evtSet.size(), std::vector ( lheFile.getEvents().size() )); - for( auto boolSets : eventBools ){ - std::fill( boolSets->begin(), boolSets->end(), false ); - } - for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) - { - auto currProc = lheProc(*lheFile.getEvents()[k]); - eventBools[ procPos(evtSet, currProc, statVec) ]->at( k ) = true; - } - //for( size_t k = 0 ; k < eventBools.size() ; ++k ) - //{ - // eventBools[k] = std::make_shared>( pracBools[k] ); - //} - return eventBools; - } - - std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, - std::vector statVec = { "-1", "1" } ) - { - std::vector>> eventBools; - eventBools.reserve(evtSet.size()); - for (size_t i = 0; i < evtSet.size(); ++i) { - eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); - } - for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) - { - eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], statVec) ]->at( k ) = true; - } - return eventBools; - } - - std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, - sortFcn sorter, - std::vector statVec = { "-1", "1" } ) - { - std::vector>> eventBools; - eventBools.reserve(evtSet.size()); - for (size_t i = 0; i < evtSet.size(); ++i) { - eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); - } - for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) - { - eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; - } - return eventBools; - } - - std::vector>> evProcOrder( lheNode& lheFile, const std::vector>& evtSet, - statSort sorter, - std::vector statVec = { "-1", "1" } ) - { - std::vector>> eventBools; - eventBools.reserve(evtSet.size()); - for (size_t i = 0; i < evtSet.size(); ++i) { - eventBools.push_back(std::make_shared>(lheFile.getEvents().size(), false)); - } - for( size_t k = 0 ; k < lheFile.getEvents().size() ; ++k ) - { - eventBools[ evProcPos(evtSet, *lheFile.getEvents()[k], sorter, statVec) ]->at( k ) = true; - } - return eventBools; - } - - // ZW: fcn for reordering LHE file based on subprocess - std::shared_ptr>> eventReOrder( lheNode& lheFile, std::vector relProc ) - { - auto reOrdered = std::make_shared>>(); - reOrdered->reserve( std::count( relProc.begin(), relProc.end(), true ) ); - for( size_t k = 0 ; k < relProc.size() ; ++k ) - { - if(!relProc[k]){continue;} - reOrdered->push_back( lheFile.getEvents()[k] ); - } - return reOrdered; - } - - // ZW: wrapper for eventReOrder - std::vector>>> lheReOrder( lheNode& lheFile, - std::vector statVec = { "-1", "1" } ) - { - auto procSets = processPull( lheFile, statVec ); - auto relProcs = procOrder( lheFile, procSets, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - std::vector statVec = { "-1", "1" } ) - { - auto procSets = evProcessPull( lheFile, statVec ); - auto relProcs = evProcOrder( lheFile, procSets, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - std::vector> procSets, std::vector>> relProcs, - std::vector statVec = { "-1", "1" } ) - { - //auto procSets = evProcessPull( lheFile, statVec ); - //auto relProcs = evProcOrder( lheFile, procSets, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - sortFcn sorter, - std::vector statVec = { "-1", "1" } ) - { - auto procSets = evProcessPull( lheFile, sorter, statVec ); - auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - std::vector> procSets, std::vector>> relProcs, - sortFcn sorter, std::vector statVec = { "-1", "1" } ) - { - //auto procSets = evProcessPull( lheFile, sorter, statVec ); - //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - statSort sorter, - std::vector statVec = { "-1", "1" } ) - { - auto procSets = evProcessPull( lheFile, sorter, statVec ); - auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - std::vector>>> lheEvReOrder( lheNode& lheFile, - std::vector> procSets, std::vector>> relProcs, - statSort sorter, std::vector statVec = { "-1", "1" } ) - { - //auto procSets = evProcessPull( lheFile, sorter, statVec ); - //auto relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - std::vector>>> ordProcs(procSets.size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ) - { - ordProcs[k] = eventReOrder( lheFile, *relProcs[k] ); - } - return ordProcs; - } - - // ZW: transposed event information struct - evtInfo::evtInfo( const std::vector>& lheFile ){ - int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); nprts.reserve(nEvt); procIDs.reserve(nEvt); - for( auto evt : lheFile ) - { - wgts.push_back(evt->getHead().getWeight()); - scales.push_back(evt->getHead().getScale()); - aQEDs.push_back(evt->getHead().getAQED()); - aQCDs.push_back(evt->getHead().getAQCD()); - nprts.push_back(evt->getHead().getNprt()); - procIDs.push_back(evt->getHead().getProcID()); - } - } - evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec ){ - int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); - for( auto evt : lheFile ) - { - wgts.push_back(evt->getHead().getWeight()); - scales.push_back(evt->getHead().getScale()); - aQEDs.push_back(evt->getHead().getAQED()); - aQCDs.push_back(evt->getHead().getAQCD()); - size_t nPrt = 0; - for( auto stat : statVec ){ nPrt += evt->getProc()[stat].size(); } - relNPrts.push_back(nPrt); - procIDs.push_back(evt->getHead().getProcID()); - } - } - evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, - sortFcn sorter ){ - int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); - for( auto evt : lheFile ) - { - wgts.push_back(evt->getHead().getWeight()); - scales.push_back(evt->getHead().getScale()); - aQEDs.push_back(evt->getHead().getAQED()); - aQCDs.push_back(evt->getHead().getAQCD()); - size_t nPrt = 0; - for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } - relNPrts.push_back(nPrt); - procIDs.push_back(evt->getHead().getProcID()); - } - } - evtInfo::evtInfo( const std::vector>& lheFile, const std::vector& statVec, - statSort sorter ){ - int nEvt = lheFile.size(); - wgts.reserve(nEvt); scales.reserve(nEvt); aQEDs.reserve(nEvt); aQCDs.reserve(nEvt); relNPrts.reserve(nEvt); procIDs.reserve(nEvt); - for( auto evt : lheFile ) - { - wgts.push_back(evt->getHead().getWeight()); - scales.push_back(evt->getHead().getScale()); - aQEDs.push_back(evt->getHead().getAQED()); - aQCDs.push_back(evt->getHead().getAQCD()); - size_t nPrt = 0; - for( auto stat : statVec ){ nPrt += evt->getProc(sorter)[stat].size(); } - relNPrts.push_back(nPrt); - procIDs.push_back(evt->getHead().getProcID()); - } - } - - // ZW: transposed particle information struct - prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt ){ - int nEvt = lheFile.size(); - moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); - spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); - for( auto evt : lheFile ) - { - for( auto prt : evt->getPrts() ) - { - moms.push_back( prt->getE() ); - masses.push_back( prt->getMass() ); - vtims.push_back( prt->getVTim() ); - spins.push_back( prt->getSpin() ); - statuses.push_back( prt->getStatus() ); - pdgs.push_back( prt->getPDG() ); - for( size_t k = 0 ; k < 2 ; ++k ) - { - moms.push_back( prt->getMom()[k] ); - mothers.push_back( prt->getMothers()[k] ); - icols.push_back( prt->getColor()[k] ); - } - moms.push_back( prt->getMom()[2] ); - } - } - } - prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ - int nEvt = lheFile.size(); - moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); - spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); - for( auto evt : lheFile ) - { - for( auto stat : statVec ) - { - for( auto i : evt->getProcOrder()[stat] ) - { - auto prt = evt->getPrts()[i]; - moms.push_back( prt->getE() ); - masses.push_back( prt->getMass() ); - vtims.push_back( prt->getVTim() ); - spins.push_back( prt->getSpin() ); - statuses.push_back( prt->getStatus() ); - pdgs.push_back( prt->getPDG() ); - for( size_t k = 0 ; k < 2 ; ++k ) - { - moms.push_back( prt->getMom()[k] ); - mothers.push_back( prt->getMothers()[k] ); - icols.push_back( prt->getColor()[k] ); - } - moms.push_back( prt->getMom()[2] ); - } - } - } - } - prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, - sortFcn sorter ){ - int nEvt = lheFile.size(); - moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); - spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); - for( auto evt : lheFile ) - { - for( auto stat : statVec ) - { - for( auto i : evt->getProcOrder(sorter)[stat] ) - { - auto prt = evt->getPrts()[i]; - moms.push_back( prt->getE() ); - masses.push_back( prt->getMass() ); - vtims.push_back( prt->getVTim() ); - spins.push_back( prt->getSpin() ); - statuses.push_back( prt->getStatus() ); - pdgs.push_back( prt->getPDG() ); - for( size_t k = 0 ; k < 2 ; ++k ) - { - moms.push_back( prt->getMom()[k] ); - mothers.push_back( prt->getMothers()[k] ); - icols.push_back( prt->getColor()[k] ); - } - moms.push_back( prt->getMom()[2] ); - } - } - } - } - prtInfo::prtInfo( const std::vector>& lheFile, const int nPrt, const std::vector& statVec, - statSort sorter ){ - int nEvt = lheFile.size(); - moms.reserve(4*nPrt*nEvt); vtims.reserve(nPrt*nEvt); masses.reserve(nPrt*nEvt); pdgs.reserve(nPrt*nEvt); - spins.reserve(nPrt*nEvt); statuses.reserve(nPrt*nEvt); mothers.reserve(2*nPrt*nEvt); icols.reserve(2*nPrt*nEvt); - for( auto evt : lheFile ) - { - for( auto stat : statVec ) - { - for( auto i : evt->getProcOrder(sorter)[stat] ) - { - auto prt = evt->getPrts()[i]; - moms.push_back( prt->getE() ); - masses.push_back( prt->getMass() ); - vtims.push_back( prt->getVTim() ); - spins.push_back( prt->getSpin() ); - statuses.push_back( prt->getStatus() ); - pdgs.push_back( prt->getPDG() ); - for( size_t k = 0 ; k < 2 ; ++k ) - { - moms.push_back( prt->getMom()[k] ); - mothers.push_back( prt->getMothers()[k] ); - icols.push_back( prt->getColor()[k] ); - } - moms.push_back( prt->getMom()[2] ); - } - } - } - } - - // ZW: transposed LHE file with a single process type - transMonoLHE::transMonoLHE( const std::vector>& lheFile , const int nPrt ){ - evtsHead = evtInfo(lheFile); - evtsData = prtInfo(lheFile, nPrt); - process = lheFile[0]; - } - transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, const std::vector& statVec ){ - evtsHead = evtInfo(lheFile, statVec); - evtsData = prtInfo(lheFile, nPrt, statVec); - process = lheFile[0]; - } - transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, - sortFcn sorter, - std::vector statVec ){ - evtsHead = evtInfo(lheFile, statVec); - evtsData = prtInfo(lheFile, nPrt, statVec, sorter); - process = lheFile[0]; - } - transMonoLHE::transMonoLHE( const std::vector>& lheFile, const int nPrt, - statSort sorter, - std::vector statVec){ - evtsHead = evtInfo(lheFile, statVec); - evtsData = prtInfo(lheFile, nPrt, statVec, sorter); - process = lheFile[0]; - } - - // ZW: transposed LHE file ordered by subprocess - transLHE::transLHE(){ return; } - transLHE::transLHE( lheNode& lheFile ) - { - procSets = evProcessPull( lheFile ); - relProcs = evProcOrder( lheFile, procSets ); - xmlFile = lheFile.getFile(); - auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs ); - subProcs = std::vector>( procsOrdered.size() ); - for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) - { - subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt() ); - } - } - transLHE::transLHE( lheNode& lheFile, - sortFcn sorter, - const std::vector& statVec ) - { - procSets = evProcessPull( lheFile, sorter, statVec ); - relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - xmlFile = lheFile.getFile(); - auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); - subProcs = std::vector>( procsOrdered.size() ); - for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) - { - subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); - } - } - transLHE::transLHE( lheNode& lheFile, - statSort sorter, - const std::vector& statVec) - { - procSets = evProcessPull( lheFile, sorter, statVec ); - relProcs = evProcOrder( lheFile, procSets, sorter, statVec ); - xmlFile = lheFile.getFile(); - auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, sorter, statVec ); - subProcs = std::vector>( procsOrdered.size() ); - for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) - { - subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), sorter, statVec ); - } - } - transLHE::transLHE( lheNode& lheFile, const std::vector& statVec ) - { - procSets = evProcessPull( lheFile, statVec ); - relProcs = evProcOrder( lheFile, procSets, statVec ); - xmlFile = lheFile.getFile(); - auto procsOrdered = lheEvReOrder( lheFile, procSets, relProcs, statVec ); - subProcs = std::vector>( procsOrdered.size() ); - for( size_t k = 0 ; k < procsOrdered.size() ; ++k ) - { - subProcs[k] = std::make_shared( *procsOrdered[k], procsOrdered[k]->at(0)->getNprt(), statVec ); - } - } -// template - std::shared_ptr> transLHE::vectorFlat( std::vector>> vecVec ) - { - if( vecVec.size() != relProcs.size() ) throw std::range_error("vectorFlat: input vector size does not match number of subprocesses"); - for( size_t k = 0 ; k < vecVec.size() ; ++k){ - if( vecVec[k]->size() == relProcs[k]->size() ) continue; - else throw std::range_error("vectorFlat: input vector size does not match number of events for subprocess"); - } - auto flatVec = std::make_shared>(relProcs[0]->size()); - for( size_t k = 0 ; k < relProcs.size() ; ++k ){ - size_t currInd = 0; - for( size_t j = 0 ; j < relProcs[k]->size() ; ++j ){ - if( relProcs[k]->at(j) ){ - flatVec->at(currInd) = vecVec[k]->at(currInd); - ++currInd; - } - } - } - return flatVec; - } - - // ZW: vector transformation string_to_double - std::shared_ptr> vecStoD( const std::vector dataVec ) - { - auto valVec = std::make_shared>( dataVec.size() ); - std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ - return std::stod(std::string(stv)); - } ); - return valVec; - } - - // ZW: vector transformation string_to_int - std::shared_ptr> vecStoI( const std::vector dataVec ) - { - auto valVec = std::make_shared>( dataVec.size() ); - std::transform( dataVec.begin(), dataVec.end(), valVec->begin(), []( const std::string_view& stv ){ - return std::stoi(std::string(stv)); - } ); - return valVec; - } - - // ZW: templated fcn for multiplying two vectors elementwise, - // assuming T has a multiplication operator* - template - std::shared_ptr> vecElemMult( const std::vector& vec1, const std::vector& vec2){ - if( vec1.size() < vec2.size() ){ return vecElemMult( vec2, vec1 ); } - auto valVec = std::make_shared>( vec1.size() ); - std::transform( vec1.begin(), vec1.end(), vec2.begin(), valVec->begin(), []( const T& v1, const T& v2 ){ - return v1 * v2; - } ); - return valVec; - } - - // ZW: bool struct to define which double values - // to extract transposed from LHE file - std::vector lheRetDs::getBools(){ - return { ebmup, xsecup, xerrup, xmaxup, xwgtup, scalup, aqedup, aqcdup, - pup, mass, vtimup, spinup }; - } - - // ZW: bool struct to define which int values - // to extract transposed from LHE file - std::vector lheRetInts::getBools(){ - return { idbmup, pdfgup, pdfsup, idwtup, nprup, lprup, - nup, idprup, idup, istup, mothup, icolup }; - } - - // ZW: function for extracting transposed double values - // from LHE file - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, lheRetDs vals ) - { - // ZW: hard-setting returning g_S instead of a_S for now - bool aStogS = true; - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile ); - auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheDos; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } - if( boolVec[1] ){ - std::vector xsecVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xsecVec.push_back(line->xsecup); - } - lheDs[currInd] = vecStoD( xsecVec ); - ++currInd; } - if( boolVec[2] ){ - std::vector xerrVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xerrVec.push_back(line->xerrup); - } - lheDs[currInd] = vecStoD( xerrVec ); - ++currInd; } - if( boolVec[3] ){ - std::vector xmaxVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xmaxVec.push_back(line->xmaxup); - } - lheDs[currInd] = vecStoD( xmaxVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } - if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } - if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); - if( aStogS ){ - std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), - []( double alphaS ){ - auto gS = std::sqrt( 4. * M_PI * alphaS ); - return gS; - } ); - } - ++currInd; - } - if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } - } - - return lheDos; - } - - std::shared_ptr>>> lheValDoubles(transLHE& lheAOS, lheRetDs vals ) - { - // ZW: hard-setting returning g_S instead of a_S for now - bool aStogS = true; - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - //auto lheAOS = transLHE( lheFile ); - auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheDos; - int currInd = 0; - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } - if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } - if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); - if( aStogS ){ - std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), - []( double alphaS ){ - auto gS = std::sqrt( 4. * M_PI * alphaS ); - return gS; - } ); - } - ++currInd; - } - if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } - } - return lheDos; - } - - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, - const std::vector& statVec, lheRetDs vals = lheRetDs() ) - { - // ZW: hard-setting returning g_S instead of a_S for now - bool aStogS = true; - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, statVec ); - auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheDos; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } - if( boolVec[1] ){ - std::vector xsecVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xsecVec.push_back(line->xsecup); - } - lheDs[currInd] = vecStoD( xsecVec ); - ++currInd; } - if( boolVec[2] ){ - std::vector xerrVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xerrVec.push_back(line->xerrup); - } - lheDs[currInd] = vecStoD( xerrVec ); - ++currInd; } - if( boolVec[3] ){ - std::vector xmaxVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xmaxVec.push_back(line->xmaxup); - } - lheDs[currInd] = vecStoD( xmaxVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } - if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } - if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); - if( aStogS ){ - std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), - []( double alphaS ){ - auto gS = std::sqrt( 4. * M_PI * alphaS ); - return gS; - } ); - } - ++currInd; - } - if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } - } - - return lheDos; - } - - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, - sortFcn sorter, - const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) - { - // ZW: hard-setting returning g_S instead of a_S for now - bool aStogS = true; - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, sorter, statVec ); - auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheDos; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } - if( boolVec[1] ){ - std::vector xsecVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xsecVec.push_back(line->xsecup); - } - lheDs[currInd] = vecStoD( xsecVec ); - ++currInd; } - if( boolVec[2] ){ - std::vector xerrVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xerrVec.push_back(line->xerrup); - } - lheDs[currInd] = vecStoD( xerrVec ); - ++currInd; } - if( boolVec[3] ){ - std::vector xmaxVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xmaxVec.push_back(line->xmaxup); - } - lheDs[currInd] = vecStoD( xmaxVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } - if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } - if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); - if( aStogS ){ - std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), - []( double alphaS ){ - auto gS = std::sqrt( 4. * M_PI * alphaS ); - return gS; - } ); - } - ++currInd; - } - if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } - } - - return lheDos; - } - - std::shared_ptr>>> lheValDoubles( lheNode& lheFile, - statSort sorter, - const std::vector& statVec = {"-1", "1"}, lheRetDs vals = lheRetDs() ) - { - // ZW: hard-setting returning g_S instead of a_S for now - bool aStogS = true; - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, sorter, statVec ); - auto lheDos = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheDos; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoD( { lheFile.getInit()->getHead()->ebmup[0], lheFile.getInit()->getHead()->ebmup[1] } ); ++currInd; } - if( boolVec[1] ){ - std::vector xsecVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xsecVec.push_back(line->xsecup); - } - lheDs[currInd] = vecStoD( xsecVec ); - ++currInd; } - if( boolVec[2] ){ - std::vector xerrVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xerrVec.push_back(line->xerrup); - } - lheDs[currInd] = vecStoD( xerrVec ); - ++currInd; } - if( boolVec[3] ){ - std::vector xmaxVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - xmaxVec.push_back(line->xmaxup); - } - lheDs[currInd] = vecStoD( xmaxVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[4] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.wgts ); ++currInd; } - if( boolVec[5] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.scales ); ++currInd; } - if( boolVec[6] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQEDs ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsHead.aQCDs ); - if( aStogS ){ - std::transform( lheDs[currInd]->begin(), lheDs[currInd]->end(), lheDs[currInd]->begin(), - []( double alphaS ){ - auto gS = std::sqrt( 4. * M_PI * alphaS ); - return gS; - } ); - } - ++currInd; - } - if( boolVec[8] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.moms ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.masses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.vtims ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoD( lheAOS.subProcs[k]->evtsData.spins ); ++currInd; } - } - - return lheDos; - } - - // ZW: function for extracting transposed int values - // from LHE file - std::shared_ptr>>> lheValInts( lheNode& lheFile, lheRetInts vals = lheRetInts() ) - { - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile ); - auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheIs; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } - if( boolVec[5] ){ - std::vector lprVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - lprVec.push_back(line->lprup); - } - lheDs[currInd] = vecStoI( lprVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } - if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } - } - return lheIs; - } - - std::shared_ptr>>> lheValInts( lheNode& lheFile, std::vector statVec, - lheRetInts vals = lheRetInts() ) - { - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, statVec ); - auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheIs; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } - if( boolVec[5] ){ - std::vector lprVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - lprVec.push_back(line->lprup); - } - lheDs[currInd] = vecStoI( lprVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } - if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } - } - return lheIs; - } - - std::shared_ptr>>> lheValInts( lheNode& lheFile, - sortFcn sorter, - std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) - { - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, sorter, statVec ); - auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheIs; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } - if( boolVec[5] ){ - std::vector lprVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - lprVec.push_back(line->lprup); - } - lheDs[currInd] = vecStoI( lprVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } - if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } - } - return lheIs; - } - - std::shared_ptr>>> lheValInts( lheNode& lheFile, - statSort sorter, - std::vector statVec = {"-1", "1"}, lheRetInts vals = lheRetInts() ) - { - auto boolVec = vals.getBools(); - const int noVals = std::count(boolVec.begin(), boolVec.end(), true); - auto lheAOS = transLHE( lheFile, sorter, statVec ); - auto lheIs = std::make_shared>>>(noVals * lheAOS.subProcs.size() ); - std::vector>> &lheDs = *lheIs; - int currInd = 0; - if( boolVec[0] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idbmup[0], lheFile.getInit()->getHead()->idbmup[1] } ); ++currInd; } - if( boolVec[1] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfgup[0], lheFile.getInit()->getHead()->pdfgup[1] } ); ++currInd; } - if( boolVec[2] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->pdfsup[0], lheFile.getInit()->getHead()->pdfsup[1] } ); ++currInd; } - if( boolVec[3] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->idwtup } ); ++currInd; } - if( boolVec[4] ){ lheDs[currInd] = vecStoI( { lheFile.getInit()->getHead()->nprup } ); ++currInd; } - if( boolVec[5] ){ - std::vector lprVec( lheFile.getInit()->getLines().size() ); - for( auto line : lheFile.getInit()->getLines() ) - { - lprVec.push_back(line->lprup); - } - lheDs[currInd] = vecStoI( lprVec ); - ++currInd; } - for( size_t k = 0 ; k < lheAOS.subProcs.size() ; ++k ) - { - if( boolVec[6] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.nprts ); ++currInd; } - if( boolVec[7] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsHead.procIDs ); ++currInd; } - if( boolVec[8] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.pdgs ); ++currInd; } - if( boolVec[9] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.statuses ); ++currInd; } - if( boolVec[10] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.mothers ); ++currInd; } - if( boolVec[11] ){ lheDs[currInd] = vecStoI( lheAOS.subProcs[k]->evtsData.icols ); ++currInd; } - } - return lheIs; - } -} - -#endif diff --git a/tools/REX/pepper.cu b/tools/REX/pepper.cu deleted file mode 100644 index b49c20fb16..0000000000 --- a/tools/REX/pepper.cu +++ /dev/null @@ -1,169 +0,0 @@ -#include "PEPPER.hpp" -#include "fbridge.cc" -#include -#include - -struct fbridgeRunner{ - std::vector rndHel; - std::vector rndCol; - std::vector selHel; - std::vector selCol; - CppObjectInFortran *fBridge; - const unsigned int chanId = 0; - const unsigned int nMom = 4; - unsigned int nEvt; - unsigned int nPar; - fbrideRunner(){} - fbridgeRunner( PEP::lheNode& lheFile ){ - if( !lheFile.isParsed() ){ lheFile.deepParse(); } - nEvt = lheFile.events.size(); - rndHel = std::vector( nEvt, 0. ); - rndCol = std::vector( nEvt, 0. ); - selHel = std::vector( nEvt, 0 ); - selCol = std::vector( nEvt, 0 ); - nPar = lheFile.events[0]->getPrts().size(); - } - fbridgeRunner( std::shared_ptr lheFile ){ - if(!lheFile->isParsed() ){ lheFile->deepParse(); } - nEvt = lheFile->events.size(); - rndHel = std::vector( nEvt, 0. ); - rndCol = std::vector( nEvt, 0. ); - selHel = std::vector( nEvt, 0 ); - selCol = std::vector( nEvt, 0 ); - nPar = lheFile->events[0]->getPrts().size(); - } - std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ - std::shared_ptr> evalScatAmps( nEvt ); - fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - return evalScatAmps; - } - std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ - if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ - std::shared_ptr> nuMom( nEvt ); - std::shared_ptr> nuAlphaS( nEvt ); - std::transform( momenta->begin(), momenta->end(), nuMom->begin(), [](double mom){ return static_cast(mom); }) - std::transform( alphaS->begin(), alphaS->end(), nuAlphaS->begin(), [](double gs){ return static_cast(gs); }); - return scatAmp( nuMom, nuAlphaS ); - } - std::shared_ptr> evalScatAmps( nEvt ); - fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta->at(0), &alphaS->at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - return evalScatAmps; - } - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - std::shared_ptr> evalScatAmps( nEvt ); - fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - return evalScatAmps; - } - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ - auto nuMom = std::vector( nEvt ); - auto nuAlphaS = std::vector( nEvt ); - std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) - std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); - return scatAmp( nuMom, nuAlphaS ); - } - auto evalScatAmps = std::shared_ptr>( nEvt ); - fbridgecreate_( &fBridge, &nEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta[0], &alphaS[0], &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - return evalScatAmps; - } -}; - -std::shared_ptr> meEval( std::vector& x, std::vector& y){ - int random = rand() % 10; - if( random == 0 ){ random = 11; } - auto thisIsIt = std::make_shared>( y.size(), random ); - return thisIsIt; -} - -int usage( char* argv0, int ret = 1 ) -{ - std::cout << "Usage: " << argv0 - << " [--lhefile=\"/YOUR/PATH/HERE\"|-lhe=\"/YOUR/PATH/HERE\"] [--rwgtcard=/YOUR/PATH/HERE|-rwgt=\"/YOUR/PATH/HERE\"]\n" - << "[--output=/YOUR/PATH/HERE\"|-out=\"/YOUR/PATH/HERE\"]\n"; - std::cout << "\n"; - std::cout << "The LHE file path should be with respect to the directory you are running\n"; - std::cout << "this program from, and similarly the rwgt_card should be as well.\n"; - if( typeid(FORTRANFPTYPE(0)) == typeid(double(0)) ){ - std::cout << "The program is currently compiled with double precision.\n"; - } else if( typeid(FORTRANFPTYPE(0)) == typeid(float(0)) ){ - std::cout << "The program is currently compiled with float precision.\n"; - } else{ std::cout << "The program is currently compiled with an unrecognised precision -- FPTYPE is neither float nor double.\n"; } - std::cout << "Numerical precision can only be redefined at compile time.\nIf you wish to change the precision, please recompile with the option \"FPTYPE=f\"/\"FPTYPE=d\"."; - return ret; -} - - -int main( int argc, char** argv ){ - std::string lheFilePath; - std::string rwgtCardPath; - std::string outputPath; - std::string slhaPath; - - // READ COMMAND LINE ARGUMENTS - for( auto arg : argv ) - { - auto currArg = std::string( arg ); - if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) - { - lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); - } - else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) - { - rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); - } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ - outputPath = currArg.substr( currArg.find( "=" ) + 1 ); - } else - { - return usage( argv[0] ); - } - } - - if( lheFilePath.empty() || rwgtCardPath.empty() ){ - return usage( argv[0] ); - } - - std::string currPath = argv[0]; - - size_t slashPos = currPath.find_last_of( "/" ); - bool onWindows = false; - if( slashPos == std::string::npos ){ slashPos = currpath.find_last_of( "\\" ); onWindows = true; } - if( slashPos == std::string::npos ) - throw std::runtime_error( "Failed to determine current working directory -- need to know where program is run from to identify where to pull and push param_card.dat." ); - - if( onWindows ){ - if( currPath.substr( currPath.find_last_of("\\", slashPos - 1) + 1, 2 ) == "P1" ){ - slhaPath = "..\\..\\Cards\\param_card.dat"; - } else{ - slhaPath = "\\Cards\\param_card.dat"; - } - } else { - if( currPath.substr( currPath.find_last_of("/", slashPos - 1) + 1, 2 ) == "P1" ){ - slhaPath = "../../Cards/param_card.dat"; - } else { - slhaPath = "/Cards/param_card.dat"; - } - } - - - PEP::PER::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); - fileCol.initCards(); - - auto bridgeCont = fbridgeRunner( fileCol.getLhe() ); - - std::function>( std::vector&, std::vector& )> scatteringAmplitude = bridgeCont.scatAmp; - PEP::PER::rwgtRunner nuRun( fileCol, scatteringAmplitude ); - - - nuRun.runRwgt( outputPath ); - - return 0; - -} \ No newline at end of file diff --git a/tools/REX/rwgt_runner_copy.cc b/tools/REX/rwgt_runner_copy.cc deleted file mode 100644 index 5e89f8c3d3..0000000000 --- a/tools/REX/rwgt_runner_copy.cc +++ /dev/null @@ -1,197 +0,0 @@ -//========================================================================== -// Copyright (C) 2023-2024 CERN -// Licensed under the GNU Lesser General Public License (version 3 or later). -// Written by: Z. Wettersten (Jan 2024) for the MG5aMC CUDACPP plugin. -//========================================================================== -//========================================================================== -// This file has been automatically generated for the CUDACPP plugin by -%(info_lines)s -//========================================================================== -//========================================================================== -// A class for reweighting matrix elements for -%(process_lines)s -//-------------------------------------------------------------------------- - -#include "rwgt_instance.h" -#include "fbridge.cc" - -// ZW: SET UP NAMESPACE -namespace %(process_namespace)s{ -//namespace dummy{ - - std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, int& chanId ){ - CppObjectInFortran *bridgeInst; - auto evalScatAmps = std::make_shared>( nEvt ); - fbridgecreate_( &bridgeInst, &nEvt, &nPar, &nMom ); - fbridgesequence_( &bridgeInst, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &bridgeInst ); - return evalScatAmps; - } - - rwgt::fBridge& bridgeConstr( std::vector& process, unsigned int warpSize = 32 ){ - rwgt::fBridge constrBridge = rwgt::fBridge( process, warpSize ); - rwgt::bridgeWrapper amplitude = amp; - constrBridge.setBridge( amplitude ); - return constrBridge; - } - - std::shared_ptr> procSort( std::string_view status, std::vector arguments ){ - std::vector> initPrts = {%(init_prt_ids)s}; - std::vector> finPrts = {%(fin_prt_ids)s}; -// std::vector initPrts = {"-1"}; -// std::vector finPrts = {"1"}; - std::shared_ptr> refOrder; - if( status == "-1" ){ - for( auto& prts : initPrts ){ - refOrder = REX::getRefOrder( prts, arguments ); - if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } - } - return refOrder; - } - else if( status == "1" ){ - for( auto& prts : finPrts ){ - refOrder = REX::getRefOrder( prts, arguments ); - if( refOrder->at(refOrder->size() - 1) != REX::npos ){ break; } - } - return refOrder; - } - return REX::stoiSort( arguments ); - } - - bool checkProc( REX::event& process, std::vector& relStats ){ - REX::statSort locSort = procSort; - auto order = process.getProcOrder( locSort ); - for( auto stat : relStats ){ - auto currPts = order.at( stat ); - if( currPts[currPts.size() - 1 ] == REX::npos ){ return false; } - } - return true; - } - - REX::eventSet& eventSetConstr( std::vector& process ){ - REX::eventSet constrSet = REX::eventSet( process ); - REX::eventSetComp compar = checkProc; - constrSet.setComp( compar ); - return constrSet; - } - - REX::eventSet& getEventSet(){ - std::vector>> eventVec = {%(process_events)s}; - std::vector process; - for( auto& ev : eventVec ){ - process.push_back( REX::event( ev ) ); - } - return eventSetConstr( process ); - } - - struct fbridgeRunner{ - std::vector rndHel; - std::vector rndCol; - std::vector selHel; - std::vector selCol; - CppObjectInFortran *fBridge; - const unsigned int chanId = 0; - const int nMom = 4; - int nWarpRemain; - int nEvt; - int fauxNEvt; - int nPar; - bool setup = false; - fbridgeRunner(){} - fbridgeRunner( REX::event& process ){ - nPar = process.getPrts().size(); - } - void runnerSetup( unsigned int& noEvts, unsigned int warpSize = 32){ - if( setup ){ return; } - nEvt = noEvts; - nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); - fauxNEvt = nEvt + nWarpRemain; - rndHel = std::vector( fauxNEvt, 0. ); - rndCol = std::vector( fauxNEvt, 0. ); - selHel = std::vector( fauxNEvt, 0 ); - selCol = std::vector( fauxNEvt, 0 ); - setup = true; - } - void runnerSetup( std::vector& evVec, unsigned int warpSize = 32){ - if( setup ){ return; } - nEvt = evVec.size(); - nWarpRemain = rwgt::warpRemain( nEvt, warpSize ); - fauxNEvt = nEvt + nWarpRemain; - rndHel = std::vector( fauxNEvt, 0. ); - rndCol = std::vector( fauxNEvt, 0. ); - selHel = std::vector( fauxNEvt, 0 ); - selCol = std::vector( fauxNEvt, 0 ); - setup = true; - } - void runnerSetup( std::shared_ptr> evVec, unsigned int warpSize = 32){ - if( setup ){ return; } - runnerSetup( *evVec, warpSize ); - } - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - runnerSetup( alphaS ); - for( auto j = 0 ; j < nWarpRemain ; ++j ){ - alphaS.push_back( 0. ); - for( auto k = 0 ; k < nMom * nPar ; ++k ){ - momenta.push_back( 0. ); - } - } - auto evalScatAmps = std::make_shared>( fauxNEvt ); - fbridgecreate_( &fBridge, &fauxNEvt, &nPar, &nMom ); - fbridgesequence_( &fBridge, &momenta.at(0), &alphaS.at(0), &rndHel[0], &rndCol[0], &chanId, &evalScatAmps->at(0), &selHel[0], &selCol[0] ); - fbridgedelete_( &fBridge ); - alphaS.resize( nEvt ); - momenta.resize( nEvt * nPar * nMom ); - evalScatAmps->resize( nEvt ); - return evalScatAmps; - } - std::shared_ptr> scatAmp( std::shared_ptr> momenta, std::shared_ptr> alphaS ){ - return scatAmp( *momenta, *alphaS ); - } -#if defined MGONGPU_FPTYPE_FLOAT - std::shared_ptr> scatAmp( std::vector& momenta, std::vector& alphaS ){ - auto nuMom = std::vector( nEvt ); - auto nuAlphaS = std::vector( nEvt ); - std::transform( momenta.begin(), momenta.end(), nuMom.begin(), [](double mom){ return static_cast(mom); }) - std::transform( alphaS.begin(), alphaS.end(), nuAlphaS.begin(), [](double gs){ return static_cast(gs); }); - return scatAmp( nuMom, nuAlphaS ); - } -#endif - }; - - std::shared_ptr> thisProcSort( std::string_view& status, std::vector& arguments ){ - std::vector initPrts = %(init_prt_ids)s; - std::vector finPrts = %(fin_prt_ids)s; -// std::vector initPrts = {"-1"}; -// std::vector finPrts = {"1"}; - if( status == "-1" ){ - return REX::getRefOrder( initPrts, arguments ); - } - else if( status == "1" ){ - return REX::getRefOrder( finPrts, arguments ); - } - return REX::stoiSort( arguments ); - } - -// ZW: SET UP INPUT LHE BLOCK -// ZW: SET UP REX::event FROM LHE BLOCK -// auto procEvent = REX::event( procEvent ); -// REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - - std::vector> eventVec = {%(process_event)s}; - REX::event locEv = REX::event( eventVec ); - fbridgeRunner fBridge = fbridgeRunner( locEv ); - - REX::teaw::amplitude scatteringAmp = []( std::vector& momenta, std::vector& alphaS ){ - return fBridge.scatAmp( momenta, alphaS ); - }; - - REX::statSort currProcSort = []( std::string_view stat, std::vector vec ){ return thisProcSort( stat, vec ); }; - - auto runner = rwgt::instance(eventVec, scatteringAmp); - auto thisProc = runner.process.getProc( currProcSort ); -// ZW: SET UP WRAPPER FOR FORTRAN_BRIDGE - -// ZW: SET UP EVALUATION OF MATRIX ELEMENTS FUNCTION - - -} \ No newline at end of file diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 147b09de98..de24f00721 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -52,7 +52,7 @@ namespace REX::teaw rwgtVal::rwgtVal( std::string_view paramLine ) : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } realLine = paramLine; - auto vals = *REX::nuBlankSplitter( realLine ); + auto vals = *REX::blankSplitter( realLine ); blockName = vals[1]; idStr = vals[2]; valStr = vals[3]; @@ -119,13 +119,13 @@ namespace REX::teaw void rwgtProc::parse(){ std::vector blocks; std::vector>> params; - auto procLines = *REX::nuLineSplitter( procString ); + auto procLines = *REX::lineSplitter( procString ); for( auto line : procLines ) { if( line.find_first_not_of(" \n\r\f\t\v") == '#' ){ continue; } auto strtPt = line.find("set"); if( strtPt == REX::npos ){ continue; } - auto words = *REX::nuWordSplitter( line.substr(strtPt) ); + auto words = *REX::blankSplitter( line.substr(strtPt) ); auto currBlock = words[1]; auto loc = std::find_if( blocks.begin(), blocks.end(), [&]( std::string_view block ){ return (block == currBlock); } ); @@ -167,7 +167,7 @@ namespace REX::teaw std::string_view rwgtProc::comRunProc(){ return procString; } void rwgtCard::parse( bool parseOnline ){ - auto allLaunchPos = REX::nuFindEach( this->srcCard, "launch" ); + auto allLaunchPos = REX::findEach( this->srcCard, "launch" ); std::vector lnchPos; lnchPos.reserve( allLaunchPos->size() ); for( auto pos : *allLaunchPos ) @@ -176,7 +176,7 @@ namespace REX::teaw if( srcCard.find_last_of("#", pos) < srcCard.find_last_of("\n", pos) ){ lnchPos.push_back(pos); } } lnchPos.push_back( REX::npos ); - auto preamble = REX::nuLineSplitter( srcCard.substr( 0, lnchPos[0] - 1 ) ); + auto preamble = REX::lineSplitter( srcCard.substr( 0, lnchPos[0] - 1 ) ); for( auto line : *preamble ) { if( line[line.find_first_not_of(" \n\r\f\t\v")] == '#' ){ continue; } @@ -668,7 +668,12 @@ namespace REX::teaw omegaSqr += invWgt * invWgt; } double var = (omegaSqr - omega * omega * invN) * invN * xSecCurr * xSecCurr; - errXSecs->push_back( std::sqrt( sqrtInvN * var )*xSec + xSecCurr * omega * invN * xErr ); + double error = std::sqrt( std::max( 0., sqrtInvN * var) )*xSec + xSecCurr * omega * invN * xErr; + if( std::isnan( error ) || std::isinf( error ) ){ + std::cout << "\033[1;33mWarning:Error propagation yielded NaN for " << rwgtSets->rwgtNames->at(k) << ". Approximating the error at cross section level.\033[0m\n"; + error = xErr * std::max( xSec / xSecCurr, xSecCurr / xSec ); + } + errXSecs->push_back( error ); } return true; } @@ -697,9 +702,6 @@ namespace REX::teaw return nullptr; } - void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ - rwgt.runRwgt( path ); - } } #endif diff --git a/tools/REX/teawREX.hpp b/tools/REX/teawREX.hpp deleted file mode 100644 index 55740d4aac..0000000000 --- a/tools/REX/teawREX.hpp +++ /dev/null @@ -1,554 +0,0 @@ -/*** - * _ ______ _______ __ - * | | | ___ \ ___\ \ / / - * | |_ ___ __ ___ _| |_/ / |__ \ V / - * | __/ _ \/ _` \ \ /\ / / /| __| / \ - * | || __/ (_| |\ V V /| |\ \| |___/ /^\ \ - * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ - * - ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - -// Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. - -#ifndef _TEAWREX_CC_ -#define _TEAWREX_CC_ - -#include -#include -#include -#include -#include -#include -#include -#include "REX.cc" -#include "teawREX.h" - -namespace REX::teaw -{ - - template - std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&)> evalFunc) - { return evalFunc(momenta); } - - template - std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&)> evalFunc) - { return evalFunc(momenta); } - - template - std::shared_ptr> scatAmpEval(std::vector& momenta, std::function>(std::vector&, std::vector&)> evalFunc) - { return evalFunc(momenta); } - - template - std::shared_ptr> scatAmpEval(std::vector& momenta, std::function(std::vector&, std::vector&)> evalFunc) - { return evalFunc(momenta); } - - rwgtVal::rwgtVal() : paramVal(){ return; } - rwgtVal::rwgtVal( std::string_view paramLine ) - : paramVal( paramLine, false ){if( paramLine.size() == 0 ){ return; } - realLine = paramLine; - auto vals = *REX::nuBlankSplitter( realLine ); - blockName = vals[1]; - idStr = vals[2]; - valStr = vals[3]; - } - std::string_view rwgtVal::getLine(){ return realLine; } - bool rwgtVal::isAll(){ return (idStr == "all"); } - void rwgtVal::outWrite( REX::paramBlock& srcBlock ){ - if ( isAll() ) - { - for( auto param : srcBlock.params ) - { - param.valStr = valStr; - param.modded = true; - } - return; - } - auto currPar = std::find_if( srcBlock.params.begin(), srcBlock.params.end(), - [&]( const REX::paramVal& parPar ){ return (parPar.idStr == idStr ); } ); - if( currPar == srcBlock.params.end() ){ - srcBlock.params.push_back( REX::paramVal( realLine.substr(realLine.find("set") + 4) ) ); - srcBlock.params[ srcBlock.params.size() - 1 ].modded = true; - srcBlock.modded = true; - return; - } - currPar->valStr = valStr; - currPar->modded = true; - srcBlock.modded = true; - return; - } - - rwgtBlock::rwgtBlock( std::vector values, std::string_view title) - { - name = title; - rwgtVals.resize( values.size() ); - for( size_t k = 0 ; k < values.size() ; ++k ) - { - rwgtVals[k] = rwgtVal( values[k] ); - } - } - rwgtBlock::rwgtBlock( const std::vector& vals, std::string_view title ) - { - name = title; - rwgtVals = vals; - } - std::string_view rwgtBlock::getBlock(){ - if( written ){ return runBlock; } - runBlock = ""; - for( auto val : rwgtVals ){ - runBlock += std::string(val.getLine()) + "\n"; - } - written = true; - return runBlock; - } - void rwgtBlock::outWrite( REX::paramBlock& srcBlock, const std::map& blocks ) - { - for( auto parm : rwgtVals ) - { - parm.outWrite( srcBlock ); - } - srcBlock.modded = true; - return; - } - - void rwgtProc::parse(){ - std::vector blocks; - std::vector>> params; - auto procLines = *REX::nuLineSplitter( procString ); - for( auto line : procLines ) - { - auto strtPt = line.find("set"); - auto words = *REX::nuWordSplitter( line.substr(strtPt) ); - auto currBlock = words[1]; - auto loc = std::find_if( blocks.begin(), blocks.end(), - [&]( std::string_view block ){ return (block == currBlock); } ); - if( loc == blocks.end() ){ - blocks.push_back( currBlock ); - params.push_back( std::make_shared>( std::vector({rwgtVal( line )} ) )); } - else { - params[ std::distance( blocks.begin(), loc ) - 1 ]->push_back( rwgtVal( line ) ); - } - } - rwgtParams.reserve(blocks.size()); - for( size_t k = 0 ; k < blocks.size() ; ++k ) - { - rwgtParams.push_back( rwgtBlock( *params[k], blocks[k] ) ); - } - } - rwgtProc::rwgtProc( REX::lesHouchesCard slhaSet, std::string_view rwgtSet, bool parseOnline ) - { - if( rwgtSet == "" ){ return; } - auto strtLi = rwgtSet.find( "\n", rwgtSet.find("launch") ) + 1; - auto endLi = rwgtSet.find("\n", strtLi); - while( rwgtSet[rwgtSet.find_first_not_of("\n ", endLi)] == 's' ) - { endLi = rwgtSet.find( "\n", endLi + 1 ); } - procString = rwgtSet.substr( strtLi, endLi - strtLi ); - if( parseOnline ){ parse(); } - } - std::shared_ptr rwgtProc::outWrite( const REX::lesHouchesCard& paramOrig ){ - auto slhaOrig = std::make_shared( paramOrig ); - std::map blockIds; - for( size_t k = 0 ; k < slhaOrig->blocks.size() ; ++k ) - { slhaOrig->blocks[k].parse( true ); - auto nyama = std::pair( slhaOrig->blocks[k].name, k); - blockIds.insert( nyama ); } - for( auto rwgts : rwgtParams ) - { rwgts.outWrite( slhaOrig->blocks[ blockIds.at( rwgts.name ) ], blockIds ); } - slhaOrig->modded = true; - return slhaOrig; - } - std::string_view rwgtProc::comRunProc(){ return procString; } - - void rwgtCard::parse( bool parseOnline ) { - auto strt = srcCard.find("launch"); - auto commPos = srcCard.find_last_of("#", strt); - while( commPos > srcCard.find_last_of("\n", strt) ){ - if( commPos == REX::npos ){ - break; - } - strt = srcCard.find("launch", strt + 6 ); - } - while( auto chPos = srcCard.find( "set" ) < strt ){ - if( srcCard.find_last_of("#", chPos) > srcCard.find_last_of("\n", chPos) ){ chPos = srcCard.find("change", strt + 6 ); continue; } - opts.push_back( srcCard.substr( chPos, srcCard.find("\n", chPos) - chPos ) ); - } - std::vector lnchPos({strt}); - auto nuLnch = srcCard.find( "launch", strt + 6 ); - while ( nuLnch != std::string_view::npos ) - { - if( srcCard.find_last_of("#", nuLnch) < srcCard.find_last_of("\n", nuLnch) ){ lnchPos.push_back(nuLnch); } - nuLnch = srcCard.find( "launch", nuLnch + 6 ); - } - for( size_t k = 0 ; k < lnchPos.size() - 1 ; ++k ) - { - auto strtLi = srcCard.find( "set", lnchPos[k] ); - rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( strtLi, lnchPos[k+1] - strtLi ), parseOnline ) ); - if( srcCard.find( "--", lnchPos[k] ) < strtLi ){ - auto strtPos = srcCard.find( "--", lnchPos[k] ); - while( (strtPos < strtLi ) && (strtPos!= std::string_view::npos) ){ - auto nuStrtPos = std::min( srcCard.find( "\n", strtPos ), srcCard.find( "--", strtPos + 1 )); - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.push_back( srcCard.substr( strtPos, nuStrtPos - strtPos ) ); - if( rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr(2,11) == "rwgt_name"){ - rwgtRuns[ rwgtRuns.size() - 1 ].rwgtName = rwgtRuns[ rwgtRuns.size() - 1 ]. - rwgtOpts[ rwgtRuns[ rwgtRuns.size() - 1 ].rwgtOpts.size() - 1 ].substr( 11, nuStrtPos - strtPos - 11 ); - } - if( nuStrtPos == srcCard.find( "\n", strtPos ) ){ break; } - strtPos = nuStrtPos; - } - } - } - size_t endLi = srcCard.find( "\n", lnchPos[ lnchPos.size() - 1 ] ); - if( srcCard.substr( endLi + 1, 3 ) == "set" ){ - while( srcCard.substr( endLi + 1, 3 ) == "set" ) - { - endLi = srcCard.find( "\n", endLi + 1 ); - } - rwgtRuns.push_back( rwgtProc( slhaCard, srcCard.substr( lnchPos[lnchPos.size()-1], endLi - lnchPos[lnchPos.size()-1] ), parseOnline ) ); - } - rwgtProcs = std::vector(); rwgtProcs.reserve( rwgtRuns.size() ); - rwgtNames.reserve( rwgtRuns.size() ); - int p = 1; - for( auto run : rwgtRuns ){ - rwgtProcs.push_back( run.comRunProc() ); - if( run.rwgtName == "" ){ - rwgtNames.push_back( "rwgt_" + std::to_string( p++ ) ); - } else { - rwgtNames.push_back( std::string(run.rwgtName) ); - } - } - } - rwgtCard::rwgtCard( std::string_view reweight_card ){ - srcCard = reweight_card; - } - rwgtCard::rwgtCard( std::string_view reweight_card, REX::lesHouchesCard slhaParams, bool parseOnline ){ - srcCard = reweight_card; - slhaCard = slhaParams; - if( parseOnline ){ parse( parseOnline ); } - } - std::vector> rwgtCard::writeCards( REX::lesHouchesCard& slhaOrig ){ - std::vector> cardVec; - slhaOrig.parse(); - cardVec.reserve( rwgtRuns.size() ); - for( auto rwgt : rwgtRuns ) - { - cardVec.push_back( rwgt.outWrite( slhaOrig ) ); - } - return cardVec; - } - - void rwgtCollection::setRwgt( std::shared_ptr rwgts ){ - if( rwgtSet ){ return; } - rwgtSets = rwgts; - rwgtSet = true; - } - void rwgtCollection::setRwgt( rwgtCard rwgts ){ - if( rwgtSet ){ return; } - setRwgt( std::make_shared( rwgts ) ); rwgtSet = true; - } - void rwgtCollection::setSlha( std::shared_ptr slha ){ - if( slhaSet ){ return; } - slhaParameters = slha; - slhaParameters->parse(); - slhaSet = true; - } - void rwgtCollection::setSlha( REX::lesHouchesCard slha ){ - if( slhaSet ){ return; } - setSlha( std::make_shared( slha ) ); - slhaSet = true; - } - void rwgtCollection::setLhe( std::shared_ptr lhe ){ - if( lheFileSet ){ return; } - lheFile = lhe; - lheFileSet = true; - } - void rwgtCollection::setLhe( REX::lheNode& lhe ){ - if( lheFileSet ){ return; } - setLhe( std::make_shared( lhe ) ); - lheFileSet = true; - } - void rwgtCollection::setLhe( std::string_view lhe_file ){std::cout << "line 272\n"; - if( lheFileSet ){ return; } std::cout << "line 273\n"; - //lheFile = REX::lheParser( lhe_file, strt, post ); - lheFile = std::make_shared( REX::lheNode(lhe_file) ); std::cout << "line 275\n"; - lheFileSet = true; std::cout << "line 276\n"; - } - std::shared_ptr rwgtCollection::getRwgt(){ return rwgtSets; } - std::shared_ptr rwgtCollection::getSlha(){ return slhaParameters; } - std::shared_ptr rwgtCollection::getLhe(){ return lheFile; } - rwgtCollection::rwgtCollection(){ return; } - rwgtCollection::rwgtCollection( std::shared_ptr lhe, std::shared_ptr slha, std::shared_ptr rwgts ){ - setLhe( lhe ); - setSlha( slha ); - setRwgt( rwgts ); - } - template - void rwgtCollection::setDoubles(Args&&... args){ - if( lheFile == nullptr || rwgtSets == nullptr || slhaParameters == nullptr ) - throw std::runtime_error( "One or more of the necessary files (SLHA parameter card, LHE event storage file, and MadGraph-format reweight card) have not been initialised." ); - REX::lheRetDs returnBools; returnBools.xwgtup = true; returnBools.aqcdup = true; returnBools.pup = true; - eventFile = REX::transLHE( *lheFile, args... ); - auto vecOfVecs = REX::lheValDoubles( eventFile, returnBools ); - if( vecOfVecs->size() != 3 * eventFile.subProcs.size() ) - throw std::runtime_error( "Incorrect number of parameters have been extracted from the LHE file." ); - //wgts[0] = vecOfVecs->at( 0 ); gS[0] = vecOfVecs->at( 1 ); momenta[0] = vecOfVecs->at( 2 ); - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - wgts.push_back( vecOfVecs->at( 3*k ) ); - gS.push_back( vecOfVecs->at( 3*k + 1 ) ); - momenta.push_back( vecOfVecs->at( 3*k + 2 ) ); - } - } - - void rwgtFiles::setRwgtPath( std::string_view path ){ rwgtPath = path; } - void rwgtFiles::setSlhaPath( std::string_view path ){ slhaPath = path; } - void rwgtFiles::setLhePath( std::string_view path ){ lhePath = path; } - rwgtFiles::rwgtFiles() : rwgtCollection(){ return; } - rwgtFiles::rwgtFiles( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card ) : rwgtCollection(){ - setRwgtPath( reweight_card ); - setSlhaPath( slha_card ); - setLhePath( lhe_card ); - } - template - void rwgtFiles::initCards(Args&&... args){ - if( rwgtPath == "" || slhaPath == "" || lhePath == "" ) - throw std::runtime_error( "Paths to reweight card, parameter card, or LHE file have not been set" ); - pullRwgt(); pullSlha(); pullLhe(); - setLhe( *lheCard ); - setSlha( std::make_shared( *slhaCard ) ); - setRwgt( std::make_shared( *rewgtCard, *slhaParameters, true ) ); - setDoubles(args...); - } - template - void rwgtFiles::initCards( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, Args&&... args ){ - setLhePath( lhe_card ); - setSlhaPath( slha_card ); - setRwgtPath( reweight_card ); - initCards(args...); - } - void rwgtFiles::pullRwgt(){ - rewgtCard = REX::filePuller( rwgtPath ); - } - void rwgtFiles::pullSlha(){ - slhaCard = REX::filePuller( slhaPath ); - } - void rwgtFiles::pullLhe(){ - lheCard = REX::filePuller( lhePath ); - std::cout << *lheCard << "\n"; - } - - void rwgtRunner::setMeEval( amplitude eval ){ - meEval = eval; meInit = true; - ampCall nuEvals; - nuEvals.insert( std::pair( *eventFile.subProcs[0]->process, eval ) ); - meEvals = nuEvals; - } - void rwgtRunner::setMeEvals( ampCall evals ){ meEvals = evals; meCompInit = true; } - void rwgtRunner::addMeEval( const REX::event& ev, const amplitude& eval ){ meEvals.insert( std::pair( ev, eval ) ); meCompInit = true; } - rwgtRunner::rwgtRunner() : rwgtFiles(){ return; } - rwgtRunner::rwgtRunner( rwgtFiles& rwgts ) : rwgtFiles( rwgts ){ return; } - rwgtRunner::rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ) : rwgtFiles( rwgts ){ - meEval = meCalc; - meInit = true; - } - rwgtRunner::rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ) : rwgtFiles( rwgts ){ - meEvals = meCalcs; - meCompInit = true; - } - rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - amplitude meCalc ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ - meEval = meCalc; - meInit = true; - } - rwgtRunner::rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - ampCall meCalcs ) : rwgtFiles( lhe_card, slha_card, reweight_card ){ - meEvals = meCalcs; - meCompInit = true; - } - bool rwgtRunner::oneME(){ return (meInit != meCompInit); } - bool rwgtRunner::singAmp(){ return (meInit && !meCompInit); } - template - void rwgtRunner::setMEs(Args&&... args){ - initCards(args...); - if( !oneME() ) - throw std::runtime_error( "No or multiple function(s) for evaluating scattering amplitudes has been provided." ); - //ZW FIX THIS - initMEs = {}; - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - auto ins = meEvals[*(eventFile.subProcs[k]->process)]( *(momenta[k]), *(gS[k]) ); - initMEs.push_back( std::make_shared>( ins->begin(), ins->begin() + wgts[k]->size() ) ); - } - //auto ins = meEval( *(momenta[0]), *(gS[0]) ); - //initMEs = {std::make_shared>( ins->begin(), ins->begin() + wgts[0]->size() )}; - meSet = true; - } - bool rwgtRunner::setParamCard( std::shared_ptr slhaParams ){ - if( slhaPath == "" ) - throw std::runtime_error( "No parameter card path has been provided." ); - if( slhaParameters == nullptr ) - throw std::runtime_error( "No SLHA parameter card has been provided." ); - if( !REX::filePusher( slhaPath, *slhaParams->selfWrite() ) ) - throw std::runtime_error( "Failed to overwrite parameter card." ); - return true; - } - void rwgtRunner::setNormWgtsSingleME(){ - //if( initMEs->size() != wgts[0]->size() ) - // throw std::runtime_error( "Inconsistent number of events and event weights." ); - meNormWgts = {std::make_shared>( wgts[0]->size() )}; - for( size_t k = 0; k < initMEs[0]->size(); k++ ){ - meNormWgts[0]->at( k ) = wgts[0]->at( k ) / initMEs[0]->at( k ); - } - normWgt = meNormWgts[0]; - } - void rwgtRunner::setNormWgtsMultiME(){ - meNormWgts = std::vector>>( initMEs.size() ); - for( size_t k = 0 ; k < wgts.size() ; ++k ){ - meNormWgts[k] = std::make_shared>( wgts[k]->size() ); - for( size_t i = 0 ; i < wgts[k]->size() ; ++i ){ - meNormWgts[k]->at( i ) = wgts[k]->at( i ) / initMEs[k]->at( i ); - } - } - normWgt = eventFile.vectorFlat( meNormWgts ); - } - template - void rwgtRunner::setNormWgts(Args&&... args){ - if( !oneME() ){ setMEs(args...); } - //if( initMEs->size() != wgts[0]->size() ) - // throw std::runtime_error( "Inconsistent number of events and event weights." ); - for( size_t k = 0; k < initMEs.size() ; ++k ){ - if( initMEs[k]->size() != wgts[k]->size() ) - throw std::runtime_error( "Inconsistent number of events and event weights." ); - } - if( initMEs.size() == 1 ){ setNormWgtsSingleME(); } - else { setNormWgtsMultiME(); } - normWgtSet = true; - } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId ){ - if( !normWgtSet ) - throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); - if( !setParamCard( slhaParams ) ) - throw std::runtime_error( "Failed to rewrite parameter card." ); - std::shared_ptr> newWGTs; - if( singAmp() ){ - auto newMEs = meEval( *momenta[0], *gS[0] ); - newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); - } - else{ - std::vector>> nuMEs = {}; - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - } - std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); - newWGTs = REX::vecElemMult( *newMEs, *normWgt ); - } - //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); - lheIn->addWgt( 0, nuWgt ); - return true; - } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, std::string& id ){ - if( !normWgtSet ) - throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); - if( !setParamCard( slhaParams ) ) - throw std::runtime_error( "Failed to rewrite parameter card." ); - std::shared_ptr> newWGTs; - if( singAmp() ){ - auto newMEs = meEval( *momenta[0], *gS[0] ); - newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); - } - else{ - std::vector>> nuMEs = {}; - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - } - std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); - newWGTs = REX::vecElemMult( *newMEs, *normWgt ); - } - //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); - lheIn->addWgt( 0, nuWgt ); - return true; - } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, REX::event& ev ){ - if( !normWgtSet ) - throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); - if( !setParamCard( slhaParams ) ) - throw std::runtime_error( "Failed to rewrite parameter card." ); - //auto newMEs = meEval( *momenta, *gS ); - std::shared_ptr> newWGTs; - if( singAmp() ){ - auto newMEs = meEval( *momenta[0], *gS[0] ); - newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); - } - else{ - std::vector>> nuMEs = {}; - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - } - std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); - newWGTs = REX::vecElemMult( *newMEs, *normWgt ); - } - //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs ); - lheIn->addWgt( 0, nuWgt ); - return true; - } - bool rwgtRunner::singleRwgtIter( std::shared_ptr slhaParams, std::shared_ptr lheIn, size_t currId, - std::string& id, REX::event& ev ){ - if( !normWgtSet ) - throw std::runtime_error( "Normalised original weights (wgt/|ME|) not evaluated -- new weights cannot be calculated." ); - if( !setParamCard( slhaParams ) ) - throw std::runtime_error( "Failed to rewrite parameter card." ); - std::shared_ptr> newWGTs; - if( singAmp() ){ - auto newMEs = meEval( *momenta[0], *gS[0] ); - newWGTs = REX::vecElemMult( *newMEs, *meNormWgts[0] ); - } - else{ - std::vector>> nuMEs = {}; - for( size_t k = 0 ; k < eventFile.subProcs.size() ; ++k ) - { - nuMEs.push_back(meEvals[*eventFile.subProcs[k]->process]( *(momenta[k]), *(gS[k]) )); - } - std::shared_ptr> newMEs = eventFile.vectorFlat( nuMEs ); - newWGTs = REX::vecElemMult( *newMEs, *normWgt ); - } - //ZW IF MULTIPLE TYPES - REX::newWgt nuWgt( rwgtSets->rwgtRuns[currId].comRunProc(), newWGTs, id ); - lheIn->addWgt( 0, nuWgt ); - return true; - } - bool rwgtRunner::lheFileWriter( std::shared_ptr lheIn, std::string outputDir ){ - bool writeSuccess = REX::filePusher( outputDir, *lheIn->nodeWriter() ); - if( !writeSuccess ) - throw std::runtime_error( "Failed to write LHE file." ); - return true; - } - void rwgtRunner::runRwgt( const std::string& output ){ - setMEs(); - setNormWgts(); - rwgtGroup = std::make_shared(); - auto currInd = lheFile->getHeader()->addWgtGroup( rwgtGroup ); - auto paramSets = rwgtSets->writeCards( *slhaParameters ); - for( size_t k = 0 ; k < paramSets.size(); k++ ){ - singleRwgtIter( paramSets[k], lheFile, k, rwgtSets->rwgtNames[k] ); - std::cout << "."; - } - lheFileWriter( lheFile, output ); - REX::filePusher( slhaPath, *slhaCard ); - std::cout << "\nReweighting done.\n"; - } - - void rwgtRun( rwgtRunner& rwgt, const std::string& path ){ - rwgt.runRwgt( path ); - } -} - -#endif diff --git a/tools/REX/tester.cpp b/tools/REX/tester.cpp deleted file mode 100644 index 9a795d1a7e..0000000000 --- a/tools/REX/tester.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "teawREX.hpp" -#include -#include - -std::shared_ptr> meEval( std::vector& x, std::vector& y){ - int random = rand() % 10; - if( random == 0 ){ random = 11; } - auto thisIsIt = std::make_shared>( y.size(), random ); - return thisIsIt; -} - -std::shared_ptr> sortFunc(std::vector arguments){ - return REX::stoiSort(arguments); -} - -std::shared_ptr> sorterFunc(std::string_view dummy, std::vector arguments){ - return REX::stoiSort(arguments); -} - -int main( int argc, char* argv[] ){ - std::string lheFilePath; - std::string rwgtCardPath; - std::string outputPath; - std::string slhaPath; - - // READ COMMAND LINE ARGUMENTS -for( int i = 1; i < argc; i++ ) - { - auto currArg = std::string( argv[i] ); - if( currArg.substr(0,9) == "--lhefile" || currArg.substr(0,4) == "-lhe" ) - { - lheFilePath = currArg.substr( currArg.find( "=" ) + 1 ); - } - else if( currArg.substr(0,10) == "--rwgtcard" || currArg.substr(0,5) == "-rwgt" ) - { - rwgtCardPath = currArg.substr( currArg.find( "=" ) + 1 ); - } else if( currArg.substr(0,8) == "--output" || currArg.substr(0,4) == "-out" ){ - outputPath = currArg.substr( currArg.find( "=" ) + 1 ); - } else if (currArg.substr(0,12) == "--param_card" || currArg.substr(0,5) == "-slha" ){ - slhaPath = currArg.substr( currArg.find( "=" ) + 1 ); - } - } - - - std::string currPath = argv[0]; - auto sembler = std::function>(std::vector)>(sortFunc); - auto sembler2 = std::function>(std::string_view, std::vector)>(sorterFunc); - auto lheFile = REX::filePuller(lheFilePath); - //std::cout << lheFile->substr(0, 1) << "\n"; - //std::cout << bool(lheFile->compare(0, 1, "<")) << "\n"; - //std::cout << lheFile->substr(1968, 1999 - 1968) << "\n"; - auto parseLhe = REX::lheNode(*lheFile); - //std::cout << *parseLhe.nodeWriter() << "\n"; - auto treeMan = parseLhe.getTree(); - //std::cout << parseLhe.getChildren().size() << " & " << parseLhe.getEvents().size() << " & " << treeMan.getChildren()->size() << "\n"; - auto proceses = REX::lheReOrder(parseLhe, {"-1", "1", "2"} ); - auto processes2 = REX::lheEvReOrder(parseLhe, {"-1", "1", "2"} ); - //std::cout << proceses.size() << " & " << processes2.size() << "\n"; - bool comp = REX::evProcComp( *parseLhe.getEvents()[0], *parseLhe.getEvents()[1], {"-1", "1"} ); - if( comp ){ std::cout << "true\n"; } - else{ std::cout << "false\n"; } - auto evlist = REX::evProcessPull( parseLhe, {"-1", "1"} ); - //auto evsVals = lheValDoubles(parseLhe); - auto evsVals = lheValDoubles(parseLhe, sembler2); - int siz = 0; - for( auto& ev : *evsVals ){ - siz += ev->size(); - } - std::cout << evsVals->size() << "\n"; - std::cout << siz << "\n"; - - REX::teaw::rwgtFiles fileCol( lheFilePath, slhaPath, rwgtCardPath ); - fileCol.initCards(); - - return 0; - -} \ No newline at end of file From 4c01292b94a6574a0945c17c9f48a947df26a533 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 18:51:19 +0200 Subject: [PATCH 31/76] [param] regenerate gg_tt.mad for reference (in the usual directory and configuration) --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 54 +++++++++---------- .../gg_tt.mad/Cards/me5_configuration.txt | 4 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 451b75637f..b93576c52f 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005722761154174805  +DEBUG: model prefixing takes 0.0058133602142333984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,10 +164,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -183,7 +183,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.073 s +Wrote files for 10 helas calls in 0.074 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -192,29 +192,29 @@ ALOHA: aloha creates 2 routines in 0.149 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.136 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -222,15 +222,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 227 (offset 13 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m1.917s -user 0m1.636s -sys 0m0.269s +real 0m2.013s +user 0m1.676s +sys 0m0.261s Code generation completed in 2 seconds ************************************************************ * * @@ -252,9 +252,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -282,9 +282,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo From 9a50131edb7c6d1ef42d30b86a59275c3e7aa9bf Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 19:05:11 +0200 Subject: [PATCH 32/76] [param] in CODEGEN/generateAndCompare.sh move the changes to Cards/ident_card.dat _before_ `treatcards param` to prevent param_card.inc changes during rebuild This is related to https://github.com/oliviermattelaer/mg5amc_test/issues/2 --- epochX/cudacpp/CODEGEN/generateAndCompare.sh | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh index f73bcd93ef..29542287a6 100755 --- a/epochX/cudacpp/CODEGEN/generateAndCompare.sh +++ b/epochX/cudacpp/CODEGEN/generateAndCompare.sh @@ -305,6 +305,13 @@ function codeGenAndDiff() echo "*** ERROR! Code generation failed" exit 1 fi + # Add a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/2 (THIS IS ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO) + # (NEW SEP2024: move this _before_ `madevent treatcards param` as otherwise param_card.inc changes during the build of the code) + if [ "${OUTBCK}" == "madnovec" ] || [ "${OUTBCK}" == "madonly" ] || [ "${OUTBCK}" == "mad" ] || [ "${OUTBCK}" == "madcpp" ] || [ "${OUTBCK}" == "madgpu" ]; then + cat ${outproc}/Cards/ident_card.dat | head -3 > ${outproc}/Cards/ident_card.dat.new + cat ${outproc}/Cards/ident_card.dat | tail -n+4 | sort >> ${outproc}/Cards/ident_card.dat.new + \mv ${outproc}/Cards/ident_card.dat.new ${outproc}/Cards/ident_card.dat + fi # Patches moved here from patchMad.sh after Olivier's PR #764 (THIS IS ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO) if [ "${OUTBCK}" == "mad" ]; then # Force the use of strategy SDE=1 in multichannel mode (see #419) @@ -419,12 +426,6 @@ function codeGenAndDiff() ### dir_patches=PROD ### $SCRDIR/patchMad.sh ${OUTDIR}/${proc}.${autosuffix} ${vecsize} ${dir_patches} ${PATCHLEVEL} ###fi - # Add a workaround for https://github.com/oliviermattelaer/mg5amc_test/issues/2 (these are ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO) - if [ "${OUTBCK}" == "madnovec" ] || [ "${OUTBCK}" == "madonly" ] || [ "${OUTBCK}" == "mad" ] || [ "${OUTBCK}" == "madcpp" ] || [ "${OUTBCK}" == "madgpu" ]; then - cat ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat | head -3 > ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat.new - cat ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat | tail -n+4 | sort >> ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat.new - \mv ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat.new ${OUTDIR}/${proc}.${autosuffix}/Cards/ident_card.dat - fi # Additional patches that are ONLY NEEDED IN THE MADGRAPH4GPU GIT REPO cat << EOF > ${OUTDIR}/${proc}.${autosuffix}/.gitignore crossx.html From ea796d0bd18533b50559e181293a5b641a585c15 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 19:09:44 +0200 Subject: [PATCH 33/76] [param] regenerate gg_tt.mad after anticipating the changes to ident_card.dat in CODEGEN/generateAndCompare.sh: as expected, the order in param_card.inc has changed This is related to https://github.com/oliviermattelaer/mg5amc_test/issues/2 --- epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 10 +++++----- epochX/cudacpp/gg_tt.mad/Source/param_card.inc | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index b93576c52f..24e5bbd249 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058133602142333984  +DEBUG: model prefixing takes 0.005617618560791016  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -192,7 +192,7 @@ ALOHA: aloha creates 2 routines in 0.149 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.136 s +ALOHA: aloha creates 4 routines in 0.137 s VVV1 FFV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.013s -user 0m1.676s -sys 0m0.261s +real 0m2.049s +user 0m1.655s +sys 0m0.268s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/Source/param_card.inc b/epochX/cudacpp/gg_tt.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/param_card.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 From 2aeb8a3722b1b6521f3fc71bfc58bb64235d2c4e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 19:19:56 +0200 Subject: [PATCH 34/76] [param] ** COMPLETE PARAM ** regenerate all processes: param_card.inc changes in (all?) .mad directories, as expected --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 56 ++++---- .../ee_mumu.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/ee_mumu.mad/Source/param_card.inc | 14 +- .../CODEGEN_cudacpp_ee_mumu_log.txt | 35 +++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 10 +- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 36 +++--- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 58 ++++----- .../gg_tt01g.mad/Cards/me5_configuration.txt | 4 +- .../gg_tt01g.mad/Source/param_card.inc | 14 +- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 60 ++++----- .../gg_ttg.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/gg_ttg.mad/Source/param_card.inc | 14 +- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 32 ++--- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 60 ++++----- .../gg_ttgg.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/gg_ttgg.mad/Source/param_card.inc | 14 +- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 38 +++--- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 62 ++++----- .../gg_ttggg.mad/Cards/me5_configuration.txt | 4 +- .../gg_ttggg.mad/Source/param_card.inc | 14 +- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 38 +++--- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 60 ++++----- .../gq_ttq.mad/Cards/me5_configuration.txt | 4 +- .../cudacpp/gq_ttq.mad/Source/param_card.inc | 14 +- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 42 +++--- .../CODEGEN_mad_heft_gg_bb_log.txt | 54 ++++---- .../Cards/me5_configuration.txt | 4 +- .../heft_gg_bb.mad/Source/param_card.inc | 16 +-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 36 +++--- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 70 +++++----- .../Cards/me5_configuration.txt | 4 +- .../nobm_pp_ttW.mad/Source/param_card.inc | 12 +- .../CODEGEN_mad_pp_tt012j_log.txt | 96 +++++++------- .../pp_tt012j.mad/Cards/me5_configuration.txt | 4 +- .../pp_tt012j.mad/Source/param_card.inc | 14 +- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 64 +++++----- .../Cards/me5_configuration.txt | 4 +- .../smeft_gg_tttt.mad/Source/param_card.inc | 120 +++++++++--------- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 40 +++--- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 60 ++++----- .../Cards/me5_configuration.txt | 4 +- .../susy_gg_t1t1.mad/Source/param_card.inc | 90 ++++++------- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 30 ++--- .../CODEGEN_mad_susy_gg_tt_log.txt | 56 ++++---- .../Cards/me5_configuration.txt | 4 +- .../susy_gg_tt.mad/Source/param_card.inc | 90 ++++++------- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 32 ++--- 47 files changed, 799 insertions(+), 800 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index dd8c14a54f..ed08e994bb 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005581378936767578  +DEBUG: model prefixing takes 0.005808353424072266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -163,10 +163,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_ee_mumu INFO: remove old information in CODEGEN_mad_ee_mumu DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -182,13 +182,13 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.073 s +Wrote files for 8 helas calls in 0.075 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.206 s +ALOHA: aloha creates 3 routines in 0.211 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines @@ -203,24 +203,24 @@ ALOHA: aloha creates 7 routines in 0.262 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -228,15 +228,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 236 (offset 22 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README Run "open index.html" to see more information about this process. quit -real 0m2.540s -user 0m1.832s -sys 0m0.262s +real 0m2.190s +user 0m1.811s +sys 0m0.293s Code generation completed in 2 seconds ************************************************************ * * @@ -258,9 +258,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -288,9 +288,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/ee_mumu.mad/Source/param_card.inc b/epochX/cudacpp/ee_mumu.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/ee_mumu.mad/Source/param_card.inc +++ b/epochX/cudacpp/ee_mumu.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index fcd0d9109e..90b16d55ff 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -2,7 +2,6 @@ This version is intended for development/beta testing and NOT for production. This version has not been fully tested (if at all) and might have limited user support (if at all) Running MG5 in debug mode -('WARNING: loading of madgraph too slow!!!', 1.1174366474151611) ************************************************************ * * * W E L C O M E to * @@ -49,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -58,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00561213493347168  +DEBUG: model prefixing takes 0.00577545166015625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -159,7 +158,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 @@ -168,17 +167,17 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.281 s +ALOHA: aloha creates 4 routines in 0.274 s FFV1 FFV1 FFV2 @@ -187,17 +186,17 @@ ALOHA: aloha creates 4 routines in 0.281 s FFV4 FFV2_4 FFV2_4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m1.777s -user 0m1.371s -sys 0m0.095s -Code generation completed in 2 seconds +real 0m0.708s +user 0m0.606s +sys 0m0.059s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 24e5bbd249..5883f45027 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005617618560791016  +DEBUG: model prefixing takes 0.005657672882080078  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -192,7 +192,7 @@ ALOHA: aloha creates 2 routines in 0.149 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.049s -user 0m1.655s -sys 0m0.268s +real 0m1.927s +user 0m1.669s +sys 0m0.259s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 0ee963873f..73ac7d8cdc 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005754947662353516  +DEBUG: model prefixing takes 0.005669593811035156  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -159,7 +159,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -168,30 +168,30 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.149 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.547s -user 0m0.488s -sys 0m0.047s -Code generation completed in 0 seconds +real 0m0.550s +user 0m0.483s +sys 0m0.062s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b05f9d17ae..b0273fb24d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005604743957519531  +DEBUG: model prefixing takes 0.0056915283203125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,10 +172,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gg_tt01g INFO: remove old information in CODEGEN_mad_gg_tt01g DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 INFO: Processing color information for process: g g > t t~ g @2 @@ -203,7 +203,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s Wrote files for 46 helas calls in 0.192 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes @@ -212,14 +212,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.337 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.318 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -229,31 +229,31 @@ ALOHA: aloha creates 10 routines in 0.318 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 227 (offset 13 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -261,15 +261,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 243 (offset 29 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README Run "open index.html" to see more information about this process. quit -real 0m2.620s -user 0m2.318s -sys 0m0.298s +real 0m2.705s +user 0m2.357s +sys 0m0.302s Code generation completed in 3 seconds ************************************************************ * * @@ -291,9 +291,9 @@ Code generation completed in 3 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -321,9 +321,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/param_card.inc b/epochX/cudacpp/gg_tt01g.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Source/param_card.inc +++ b/epochX/cudacpp/gg_tt01g.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index f2e4616cd9..647dfb69b1 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00537419319152832  +DEBUG: model prefixing takes 0.005728483200073242  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -164,10 +164,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gg_ttg INFO: remove old information in CODEGEN_mad_gg_ttg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.122 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.124 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.339 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.320 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -208,24 +208,24 @@ ALOHA: aloha creates 10 routines in 0.320 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -233,16 +233,16 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 243 (offset 29 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README Run "open index.html" to see more information about this process. quit -real 0m2.478s -user 0m2.208s -sys 0m0.251s -Code generation completed in 2 seconds +real 0m2.510s +user 0m2.214s +sys 0m0.285s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * @@ -263,9 +263,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -293,9 +293,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttg.mad/Source/param_card.inc b/epochX/cudacpp/gg_ttg.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gg_ttg.mad/Source/param_card.inc +++ b/epochX/cudacpp/gg_ttg.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index c0f0257969..70c71a74a1 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005707740783691406  +DEBUG: model prefixing takes 0.005589723587036133  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -159,7 +159,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 @@ -168,10 +168,10 @@ INFO: Processing color information for process: g g > t t~ g @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -179,7 +179,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.341 s VVV1 VVV1 FFV1 @@ -189,17 +189,17 @@ ALOHA: aloha creates 5 routines in 0.334 s VVVV1 VVVV3 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.791s -user 0m0.726s -sys 0m0.056s +real 0m0.803s +user 0m0.748s +sys 0m0.049s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 65f27cc918..fd4150a8d6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005572319030761719  +DEBUG: model prefixing takes 0.005757808685302734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.164 s +1 processes with 123 diagrams generated in 0.166 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -164,10 +164,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gg_ttgg INFO: remove old information in CODEGEN_mad_gg_ttgg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s -Wrote files for 222 helas calls in 0.682 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.449 s +Wrote files for 222 helas calls in 0.693 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.350 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.322 s +ALOHA: aloha creates 10 routines in 0.329 s VVV1 VVV1 FFV1 @@ -211,24 +211,24 @@ ALOHA: aloha creates 10 routines in 0.322 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -236,15 +236,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 275 (offset 61 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README Run "open index.html" to see more information about this process. quit -real 0m3.904s -user 0m3.584s -sys 0m0.268s +real 0m3.932s +user 0m3.633s +sys 0m0.291s Code generation completed in 4 seconds ************************************************************ * * @@ -266,9 +266,9 @@ Code generation completed in 4 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -296,9 +296,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/param_card.inc b/epochX/cudacpp/gg_ttgg.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Source/param_card.inc +++ b/epochX/cudacpp/gg_ttgg.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 837f944753..72dce09a64 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005681276321411133  +DEBUG: model prefixing takes 0.005761146545410156  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.161 s +1 processes with 123 diagrams generated in 0.165 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -159,7 +159,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 @@ -168,18 +168,18 @@ INFO: Processing color information for process: g g > t t~ g g @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. +Generated helas calls for 1 subprocesses (123 diagrams) in 0.438 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.323 s +ALOHA: aloha creates 5 routines in 0.332 s VVV1 VVV1 FFV1 @@ -192,17 +192,17 @@ ALOHA: aloha creates 5 routines in 0.323 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.468s -user 0m1.395s -sys 0m0.062s -Code generation completed in 1 seconds +real 0m1.489s +user 0m1.409s +sys 0m0.072s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 2b142b4e41..0f10271b3c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005638599395751953  +DEBUG: model prefixing takes 0.0057866573333740234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.917 s +1 processes with 1240 diagrams generated in 1.951 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -164,10 +164,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gg_ttggg INFO: remove old information in CODEGEN_mad_gg_ttggg DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 @@ -184,8 +184,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.738 s -Wrote files for 2281 helas calls in 18.801 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.778 s +Wrote files for 2281 helas calls in 19.046 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -193,14 +193,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.331 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.321 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -213,24 +213,24 @@ ALOHA: aloha creates 10 routines in 0.321 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -238,16 +238,16 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 339 (offset 125 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README Run "open index.html" to see more information about this process. quit -real 0m33.303s -user 0m32.735s -sys 0m0.449s -Code generation completed in 33 seconds +real 0m33.700s +user 0m33.094s +sys 0m0.485s +Code generation completed in 34 seconds ************************************************************ * * * W E L C O M E to * @@ -268,9 +268,9 @@ Code generation completed in 33 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -298,9 +298,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/param_card.inc b/epochX/cudacpp/gg_ttggg.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Source/param_card.inc +++ b/epochX/cudacpp/gg_ttggg.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 319ce47773..4a3ba8518e 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005609273910522461  +DEBUG: model prefixing takes 0.005571842193603516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.925 s +1 processes with 1240 diagrams generated in 1.935 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -159,7 +159,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Processing color information for process: g g > t t~ g g g @1 @@ -168,18 +168,18 @@ INFO: Processing color information for process: g g > t t~ g g g @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.699 s +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.885 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.354 s +ALOHA: aloha creates 5 routines in 0.364 s VVV1 VVV1 FFV1 @@ -192,17 +192,17 @@ ALOHA: aloha creates 5 routines in 0.354 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.246s -user 0m13.071s -sys 0m0.121s -Code generation completed in 13 seconds +real 0m13.596s +user 0m13.408s +sys 0m0.113s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index ac9431cf42..d3e9530428 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056684017181396484  +DEBUG: model prefixing takes 0.0056192874908447266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.080 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -179,10 +179,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_gq_ttq INFO: remove old information in CODEGEN_mad_gq_ttq DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -216,8 +216,8 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.167 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Wrote files for 32 helas calls in 0.168 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -232,24 +232,24 @@ ALOHA: aloha creates 4 routines in 0.135 s FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -257,7 +257,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 254 (offset 40 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -266,16 +266,16 @@ patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 254 (offset 40 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README Run "open index.html" to see more information about this process. quit -real 0m2.217s -user 0m1.918s -sys 0m0.291s -Code generation completed in 2 seconds +real 0m2.234s +user 0m1.920s +sys 0m0.312s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * @@ -296,9 +296,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -326,9 +326,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/gq_ttq.mad/Source/param_card.inc b/epochX/cudacpp/gq_ttq.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/gq_ttq.mad/Source/param_card.inc +++ b/epochX/cudacpp/gq_ttq.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 7f98293b71..db04f926c4 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057294368743896484  +DEBUG: model prefixing takes 0.005569934844970703  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.080 s +8 processes with 40 diagrams generated in 0.083 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -174,7 +174,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 INFO: Processing color information for process: g u > t t~ u @1 @@ -191,40 +191,40 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. DEBUG: Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [output.py at line 212]  DEBUG: type(subproc_group)= [output.py at line 213]  DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=1 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.149 s FFV1 FFV1 FFV1 FFV1 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.658s -user 0m0.600s -sys 0m0.052s +real 0m0.778s +user 0m0.610s +sys 0m0.054s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 0d7d52c915..6a7dd15b25 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -48,14 +48,14 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  @@ -136,10 +136,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_heft_gg_bb INFO: remove old information in CODEGEN_mad_heft_gg_bb DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 @@ -162,37 +162,37 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.270 s +ALOHA: aloha creates 4 routines in 0.274 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.257 s +ALOHA: aloha creates 8 routines in 0.259 s VVS3 VVV1 FFV1 FFV1 FFV1 FFS2 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -200,15 +200,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 227 (offset 13 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README Run "open index.html" to see more information about this process. quit -real 0m2.177s -user 0m1.895s -sys 0m0.262s +real 0m2.197s +user 0m1.912s +sys 0m0.271s Code generation completed in 2 seconds ************************************************************ * * @@ -230,9 +230,9 @@ Code generation completed in 2 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -260,9 +260,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/param_card.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/param_card.inc index 02c21b3865..eb88c9261d 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/Source/param_card.inc +++ b/epochX/cudacpp/heft_gg_bb.mad/Source/param_card.inc @@ -1,17 +1,17 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 + MDL_WH1 = 6.382339D-03 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 MDL_MP = 1.250001D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.200000D+00 MDL_YMT = 1.645000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 - MDL_WH1 = 6.382339D-03 diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 344efcb222..e629afa7b0 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -48,20 +48,20 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft INFO: load particles INFO: load vertices WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.006150245666503906  +DEBUG: model prefixing takes 0.006257057189941406  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -136,7 +136,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 @@ -145,34 +145,34 @@ INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.265 s +ALOHA: aloha creates 4 routines in 0.269 s VVS3 VVV1 FFV1 FFV1 FFV1 FFS2 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h -INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h +INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.668s -user 0m0.604s -sys 0m0.052s -Code generation completed in 0 seconds +real 0m0.678s +user 0m0.624s +sys 0m0.044s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index f536ab73a6..5a19d6bcfd 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005758047103881836  +DEBUG: model prefixing takes 0.0057599544525146484  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -222,7 +222,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.661 s +12 processes with 144 diagrams generated in 0.666 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -236,10 +236,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_nobm_pp_ttW INFO: remove old information in CODEGEN_mad_nobm_pp_ttW DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g u > t t~ w+ d WEIGHTED<=5 @1 INFO: Processing color information for process: g u > t t~ w+ d @1 @@ -354,13 +354,13 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 8 subprocesses (76 diagrams) in 0.206 s -Wrote files for 212 helas calls in 0.836 s +Wrote files for 212 helas calls in 0.845 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.207 s +ALOHA: aloha creates 3 routines in 0.209 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines @@ -373,24 +373,24 @@ ALOHA: aloha creates 6 routines in 0.206 s FFV2 FFV2 VVV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h -INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h +INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc INFO: Created files Parameters_sm_no_b_mass.h and Parameters_sm_no_b_mass.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_dux_ttxwm; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_dux_ttxwm; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -398,7 +398,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 268 (offset 54 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_udx_ttxwp; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_udx_ttxwp; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -406,7 +406,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 268 (offset 54 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_dux_ttxwmg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_dux_ttxwmg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -414,7 +414,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gd_ttxwmu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gd_ttxwmu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -422,7 +422,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gdx_ttxwpux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gdx_ttxwpux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -430,7 +430,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gu_ttxwpd; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gu_ttxwpd; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -438,7 +438,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gux_ttxwmdx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gux_ttxwmdx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -446,7 +446,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_udx_ttxwpg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_udx_ttxwpg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -455,15 +455,15 @@ patching file matrix1.f Hunk #1 succeeded at 72 (offset 1 line). Hunk #2 succeeded at 316 (offset 102 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README Run "open index.html" to see more information about this process. quit -real 0m4.712s -user 0m4.184s -sys 0m0.513s +real 0m5.337s +user 0m4.211s +sys 0m0.535s Code generation completed in 5 seconds ************************************************************ * * @@ -485,9 +485,9 @@ Code generation completed in 5 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -515,9 +515,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/param_card.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/param_card.inc index ffbccd5a97..6c431f35ad 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/param_card.inc +++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/param_card.inc @@ -1,13 +1,13 @@ - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 - MDL_YMT = 1.730000D+02 MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 + MDL_YMT = 1.730000D+02 diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 133a8566c1..bb75e72b2c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -56,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005753755569458008  +DEBUG: model prefixing takes 0.00578761100769043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -373,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.869 s +65 processes with 1119 diagrams generated in 1.915 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -387,10 +387,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_pp_tt012j INFO: remove old information in CODEGEN_mad_pp_tt012j DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Processing color information for process: g g > t t~ g g @2 @@ -691,8 +691,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1548]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.318 s -Wrote files for 810 helas calls in 2.818 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.340 s +Wrote files for 810 helas calls in 2.854 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -700,14 +700,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.349 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.326 s +ALOHA: aloha creates 10 routines in 0.330 s VVV1 VVV1 FFV1 @@ -720,31 +720,31 @@ ALOHA: aloha creates 10 routines in 0.326 s VVVV3 VVVV4 VVVV4 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h -INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h +INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 227 (offset 13 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -752,14 +752,14 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 230 (offset 16 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 243 (offset 29 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -767,7 +767,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 246 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -775,7 +775,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 246 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -783,14 +783,14 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 246 (offset 32 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 275 (offset 61 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -798,7 +798,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -806,7 +806,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -814,7 +814,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -822,7 +822,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 76 (offset 5 lines). Hunk #2 succeeded at 280 (offset 66 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -830,7 +830,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 82 (offset 11 lines). Hunk #2 succeeded at 286 (offset 72 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -838,7 +838,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -846,7 +846,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 82 (offset 11 lines). Hunk #2 succeeded at 286 (offset 72 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -854,7 +854,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -862,7 +862,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -870,7 +870,7 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #1 succeeded at 76 (offset 5 lines). Hunk #2 succeeded at 280 (offset 66 lines). -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -879,16 +879,16 @@ patching file matrix1.f Hunk #1 succeeded at 74 (offset 3 lines). Hunk #2 succeeded at 278 (offset 64 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README Run "open index.html" to see more information about this process. quit -real 0m10.632s -user 0m9.727s -sys 0m0.868s -Code generation completed in 11 seconds +real 0m11.928s +user 0m9.873s +sys 0m0.901s +Code generation completed in 12 seconds ************************************************************ * * * W E L C O M E to * @@ -909,9 +909,9 @@ Code generation completed in 11 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -939,9 +939,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/param_card.inc b/epochX/cudacpp/pp_tt012j.mad/Source/param_card.inc index 081365c16b..1fcfce55bb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Source/param_card.inc +++ b/epochX/cudacpp/pp_tt012j.mad/Source/param_card.inc @@ -1,15 +1,15 @@ - MDL_MB = 4.700000D+00 - MDL_MT = 1.730000D+02 + MDL_WZ = 2.441404D+00 + MDL_WW = 2.047600D+00 + MDL_WH = 6.382339D-03 + MDL_WT = 1.491500D+00 MDL_MTA = 1.777000D+00 MDL_MZ = 9.118800D+01 MDL_MH = 1.250000D+02 + MDL_MB = 4.700000D+00 + MDL_MT = 1.730000D+02 AEWM1 = 1.325070D+02 MDL_GF = 1.166390D-05 AS = 1.180000D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMB = 4.700000D+00 MDL_YMT = 1.730000D+02 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.491500D+00 - MDL_WZ = 2.441404D+00 - MDL_WW = 2.047600D+00 - MDL_WH = 6.382339D-03 diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 85e696efb7..b14bad49de 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -48,14 +48,14 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14330029487609863  +DEBUG: model prefixing takes 0.14215660095214844  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.816 s +1 processes with 72 diagrams generated in 3.819 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -101,10 +101,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_smeft_gg_tttt INFO: remove old information in CODEGEN_mad_smeft_gg_tttt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 @@ -119,8 +119,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s -Wrote files for 119 helas calls in 0.396 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s +Wrote files for 119 helas calls in 0.400 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -128,14 +128,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.330 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.336 s +ALOHA: aloha creates 10 routines in 0.342 s VVV5 VVV5 FFV1 @@ -145,24 +145,24 @@ ALOHA: aloha creates 10 routines in 0.336 s VVVV1 VVVV9 VVVV10 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h -INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -170,16 +170,16 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 275 (offset 61 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README Run "open index.html" to see more information about this process. quit -real 0m7.349s -user 0m6.999s -sys 0m0.311s -Code generation completed in 7 seconds +real 0m7.523s +user 0m7.077s +sys 0m0.280s +Code generation completed in 8 seconds ************************************************************ * * * W E L C O M E to * @@ -200,9 +200,9 @@ Code generation completed in 7 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -230,9 +230,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/param_card.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/param_card.inc index 2491ed20c1..dab3aac603 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/param_card.inc +++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/param_card.inc @@ -1,25 +1,50 @@ + MDL_WZ = 2.495200D+00 + MDL_WW = 2.085000D+00 + MDL_WH = 4.070000D-03 + MDL_WT = 1.330000D+00 + MDL_ME = 5.110000D-04 + MDL_MMU = 1.056600D-01 + MDL_MTA = 1.777000D+00 MDL_MD = 4.670000D-03 + MDL_MZ = 9.118760D+01 + MDL_MH = 1.250900D+02 MDL_MU = 2.160000D-03 MDL_MS = 9.300000D-02 MDL_MC = 1.270000D+00 MDL_MB = 4.180000D+00 MDL_MT = 1.727600D+02 - MDL_ME = 5.110000D-04 - MDL_MMU = 1.056600D-01 - MDL_MTA = 1.777000D+00 - MDL_MZ = 9.118760D+01 - MDL_MH = 1.250900D+02 - MDL_CG = 0.000000D+00 - MDL_CW = 0.000000D+00 - MDL_CH = 0.000000D+00 - MDL_CHBOX = 0.000000D+00 - MDL_CHDD = 0.000000D+00 - MDL_CHG = 0.000000D+00 - MDL_CHW = 0.000000D+00 - MDL_CHB = 0.000000D+00 - MDL_CHWB = 0.000000D+00 + MDL_CEHRE = 0.000000D+00 + MDL_CEWRE = 0.000000D+00 + MDL_CEBRE = 0.000000D+00 + MDL_CHL1 = 0.000000D+00 + MDL_CHL3 = 0.000000D+00 + MDL_CHE = 0.000000D+00 + MDL_CLL = 0.000000D+00 + MDL_CLL1 = 0.000000D+00 + MDL_CLJ1 = 0.000000D+00 + MDL_CLJ3 = 0.000000D+00 MDL_CUHRE = 0.000000D+00 + MDL_CQL1 = 0.000000D+00 + MDL_CQL3 = 0.000000D+00 + MDL_CEE = 0.000000D+00 + MDL_CEU = 0.000000D+00 + MDL_CTE = 0.000000D+00 + MDL_CED = 0.000000D+00 + MDL_CBE = 0.000000D+00 + MDL_CJE = 0.000000D+00 + MDL_CQE = 0.000000D+00 + MDL_CLU = 0.000000D+00 MDL_CTHRE = 0.000000D+00 + MDL_CTL = 0.000000D+00 + MDL_CLD = 0.000000D+00 + MDL_CBL = 0.000000D+00 + MDL_CLE = 0.000000D+00 + MDL_CLEDJRE = 0.000000D+00 + MDL_CLEBQRE = 0.000000D+00 + MDL_CLEJU1RE = 0.000000D+00 + MDL_CLEQT1RE = 0.000000D+00 + MDL_CLEJU3RE = 0.000000D+00 + MDL_CLEQT3RE = 0.000000D+00 MDL_CDHRE = 0.000000D+00 MDL_CBHRE = 0.000000D+00 MDL_CUGRE = 0.000000D+00 @@ -28,6 +53,7 @@ MDL_CTWRE = 0.000000D+00 MDL_CUBRE = 0.000000D+00 MDL_CTBRE = 0.000000D+00 + MDL_CG = 0.000000D+00 MDL_CDGRE = 0.000000D+00 MDL_CBGRE = 0.000000D+00 MDL_CDWRE = 0.000000D+00 @@ -38,6 +64,7 @@ MDL_CHQ1 = 0.000000D+00 MDL_CHJ3 = 0.000000D+00 MDL_CHQ3 = 0.000000D+00 + MDL_CW = 0.000000D+00 MDL_CHU = 0.000000D+00 MDL_CHT = 0.000000D+00 MDL_CHD = 0.000000D+00 @@ -48,6 +75,7 @@ MDL_CJJ18 = 0.000000D+00 MDL_CJJ31 = 0.000000D+00 MDL_CJJ38 = 0.000000D+00 + MDL_CH = 0.000000D+00 MDL_CQJ11 = 0.000000D+00 MDL_CQJ18 = 0.000000D+00 MDL_CQJ31 = 0.000000D+00 @@ -58,6 +86,7 @@ MDL_CUU8 = 0.000000D+00 MDL_CTT = 0.000000D+00 MDL_CTU1 = 0.000000D+00 + MDL_CHBOX = 0.000000D+00 MDL_CTU8 = 0.000000D+00 MDL_CDD1 = 0.000000D+00 MDL_CDD8 = 0.000000D+00 @@ -68,6 +97,7 @@ MDL_CTB1 = 0.000000D+00 MDL_CTD1 = 0.000000D+00 MDL_CBU1 = 0.000000D+00 + MDL_CHDD = 0.000000D+00 MDL_CUD8 = 0.000000D+00 MDL_CTB8 = 0.000000D+00 MDL_CTD8 = 0.000000D+00 @@ -78,6 +108,7 @@ MDL_CQU1 = 0.000000D+00 MDL_CJU8 = 0.000000D+00 MDL_CQU8 = 0.000000D+00 + MDL_CHG = 0.000000D+00 MDL_CTJ1 = 0.000000D+00 MDL_CTJ8 = 0.000000D+00 MDL_CQT1 = 0.000000D+00 @@ -88,6 +119,7 @@ MDL_CQD8 = 0.000000D+00 MDL_CBJ1 = 0.000000D+00 MDL_CBJ8 = 0.000000D+00 + MDL_CHW = 0.000000D+00 MDL_CQB1 = 0.000000D+00 MDL_CQB8 = 0.000000D+00 MDL_CJQTU1RE = 0.000000D+00 @@ -98,6 +130,7 @@ MDL_CJUJD8RE = 0.000000D+00 MDL_CJUJD11RE = 0.000000D+00 MDL_CJUJD81RE = 0.000000D+00 + MDL_CHB = 0.000000D+00 MDL_CQTJD1RE = 0.000000D+00 MDL_CQTJD8RE = 0.000000D+00 MDL_CJUQB1RE = 0.000000D+00 @@ -108,45 +141,7 @@ MDL_CJTQD8RE = 0.000000D+00 MDL_CQTQB1RE = 0.000000D+00 MDL_CQTQB8RE = 0.000000D+00 - MDL_CEHRE = 0.000000D+00 - MDL_CEWRE = 0.000000D+00 - MDL_CEBRE = 0.000000D+00 - MDL_CHL1 = 0.000000D+00 - MDL_CHL3 = 0.000000D+00 - MDL_CHE = 0.000000D+00 - MDL_CLL = 0.000000D+00 - MDL_CLL1 = 0.000000D+00 - MDL_CLJ1 = 0.000000D+00 - MDL_CLJ3 = 0.000000D+00 - MDL_CQL1 = 0.000000D+00 - MDL_CQL3 = 0.000000D+00 - MDL_CEE = 0.000000D+00 - MDL_CEU = 0.000000D+00 - MDL_CTE = 0.000000D+00 - MDL_CED = 0.000000D+00 - MDL_CBE = 0.000000D+00 - MDL_CJE = 0.000000D+00 - MDL_CQE = 0.000000D+00 - MDL_CLU = 0.000000D+00 - MDL_CTL = 0.000000D+00 - MDL_CLD = 0.000000D+00 - MDL_CBL = 0.000000D+00 - MDL_CLE = 0.000000D+00 - MDL_CLEDJRE = 0.000000D+00 - MDL_CLEBQRE = 0.000000D+00 - MDL_CLEJU1RE = 0.000000D+00 - MDL_CLEQT1RE = 0.000000D+00 - MDL_CLEJU3RE = 0.000000D+00 - MDL_CLEQT3RE = 0.000000D+00 - MDL_CGTIL = 0.000000D+00 - MDL_CWTIL = 0.000000D+00 - MDL_CHGTIL = 0.000000D+00 - MDL_CHWTIL = 0.000000D+00 - MDL_CHBTIL = 0.000000D+00 - MDL_CHWBTIL = 0.000000D+00 - MDL_CUGIM = 0.000000D+00 - MDL_CTGIM = 0.000000D+00 - MDL_CUWIM = 0.000000D+00 + MDL_CHWB = 0.000000D+00 MDL_CTWIM = 0.000000D+00 MDL_CUBIM = 0.000000D+00 MDL_CTBIM = 0.000000D+00 @@ -157,6 +152,7 @@ MDL_CDBIM = 0.000000D+00 MDL_CBBIM = 0.000000D+00 MDL_CUHIM = 0.000000D+00 + MDL_CGTIL = 0.000000D+00 MDL_CTHIM = 0.000000D+00 MDL_CDHIM = 0.000000D+00 MDL_CBHIM = 0.000000D+00 @@ -167,6 +163,7 @@ MDL_CJQTU1IM = 0.000000D+00 MDL_CJQTU8IM = 0.000000D+00 MDL_CJQBD1IM = 0.000000D+00 + MDL_CWTIL = 0.000000D+00 MDL_CJQBD8IM = 0.000000D+00 MDL_CJUJD1IM = 0.000000D+00 MDL_CJUJD8IM = 0.000000D+00 @@ -177,6 +174,7 @@ MDL_CJUQB1IM = 0.000000D+00 MDL_CJUQB8IM = 0.000000D+00 MDL_CQUJB1IM = 0.000000D+00 + MDL_CHGTIL = 0.000000D+00 MDL_CQUJB8IM = 0.000000D+00 MDL_CJTQD1IM = 0.000000D+00 MDL_CJTQD8IM = 0.000000D+00 @@ -187,25 +185,27 @@ MDL_CEBIM = 0.000000D+00 MDL_CLEDJIM = 0.000000D+00 MDL_CLEBQIM = 0.000000D+00 + MDL_CHWTIL = 0.000000D+00 MDL_CLEJU1IM = 0.000000D+00 MDL_CLEJU3IM = 0.000000D+00 MDL_CLEQT1IM = 0.000000D+00 MDL_CLEQT3IM = 0.000000D+00 + MDL_CHBTIL = 0.000000D+00 + MDL_CHWBTIL = 0.000000D+00 + MDL_CUGIM = 0.000000D+00 + MDL_CTGIM = 0.000000D+00 + MDL_CUWIM = 0.000000D+00 MDL_LAMBDASMEFT = 1.000000D+03 MDL_MW = 8.038700D+01 MDL_GF = 1.166379D-05 AS = 1.179000D-01 MDL_LINEARPROPCORRECTIONS = 0.000000D+00 + MDL_YME = 5.110000D-04 + MDL_YMM = 1.056600D-01 + MDL_YMTAU = 1.777000D+00 MDL_YMDO = 4.670000D-03 MDL_YMUP = 2.160000D-03 MDL_YMS = 9.300000D-02 MDL_YMC = 1.270000D+00 MDL_YMB = 4.180000D+00 MDL_YMT = 1.727600D+02 - MDL_YME = 5.110000D-04 - MDL_YMM = 1.056600D-01 - MDL_YMTAU = 1.777000D+00 - MDL_WT = 1.330000D+00 - MDL_WZ = 2.495200D+00 - MDL_WW = 2.085000D+00 - MDL_WH = 4.070000D-03 diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index c09b8b4601..4faa62ba00 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -48,14 +48,14 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F set auto_convert_model T save options auto_convert_model -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t INFO: load particles INFO: load vertices @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14107561111450195  +DEBUG: model prefixing takes 0.14309954643249512  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.786 s +1 processes with 72 diagrams generated in 3.855 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -96,7 +96,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ t t~ @1 @@ -105,18 +105,18 @@ INFO: Processing color information for process: g g > t t~ t t~ @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.191 s +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. +Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.329 s VVV5 VVV5 FFV1 @@ -126,17 +126,17 @@ ALOHA: aloha creates 5 routines in 0.326 s VVVV1 VVVV9 VVVV10 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h -INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.220s -user 0m5.109s -sys 0m0.071s -Code generation completed in 5 seconds +real 0m5.412s +user 0m5.195s +sys 0m0.069s +Code generation completed in 6 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 5c926cec7c..fa543aa715 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.126 s +1 processes with 6 diagrams generated in 0.127 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -563,10 +563,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_susy_gg_t1t1 INFO: remove old information in CODEGEN_mad_susy_gg_t1t1 DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 @@ -581,42 +581,42 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.083 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s +Wrote files for 16 helas calls in 0.089 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.189 s +ALOHA: aloha creates 3 routines in 0.192 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.185 s +ALOHA: aloha creates 6 routines in 0.188 s VVV1 VSS1 VSS1 VSS1 VVSS1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -624,16 +624,16 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 215 (offset 1 line). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README Run "open index.html" to see more information about this process. quit -real 0m3.008s -user 0m2.691s -sys 0m0.313s -Code generation completed in 3 seconds +real 0m3.764s +user 0m2.748s +sys 0m0.306s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * @@ -654,9 +654,9 @@ Code generation completed in 3 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -684,9 +684,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/param_card.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/param_card.inc index eb66efe001..6acb037f00 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/param_card.inc +++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/param_card.inc @@ -1,3 +1,37 @@ + MDL_WSD1 = 5.312788D+00 + MDL_WSU1 = 5.477195D+00 + MDL_WSD2 = 5.312788D+00 + MDL_WSU2 = 5.477195D+00 + MDL_WSD3 = 3.736276D+00 + MDL_WSU3 = 2.021596D+00 + MDL_WSL1 = 2.136822D-01 + MDL_WSN1 = 1.498816D-01 + MDL_WSL2 = 2.136822D-01 + MDL_WSN2 = 1.498816D-01 + MDL_WSL3 = 1.483273D-01 + MDL_WSN3 = 1.475190D-01 + MDL_WGO = 5.506754D+00 + MDL_WNEU2 = 2.077700D-02 + MDL_WCH1 = 1.704145D-02 + MDL_WNEU3 = -1.915985D+00 + MDL_WNEU4 = 2.585851D+00 + MDL_WCH2 = 2.486895D+00 + MDL_WSD4 = 2.858123D-01 + MDL_WSU4 = 1.152973D+00 + MDL_WSD5 = 2.858123D-01 + MDL_WSU5 = 1.152973D+00 + MDL_WSD6 = 8.015663D-01 + MDL_WSU6 = 7.373133D+00 + MDL_WSL4 = 2.161216D-01 + MDL_WSL5 = 2.161216D-01 + MDL_WSL6 = 2.699061D-01 + MDL_WZ = 2.411433D+00 + MDL_WW = 2.002822D+00 + MDL_WH01 = 1.986108D-03 + MDL_WH02 = 5.748014D-01 + MDL_WA0 = 6.321785D-01 + MDL_WH = 5.469628D-01 + MDL_WT = 1.561950D+00 MDL_RRD1X1 = 1.000000D+00 MDL_RRD2X2 = 1.000000D+00 MDL_RRD3X3 = 9.387379D-01 @@ -10,15 +44,6 @@ MDL_RMUH = 3.576810D+02 MDL_TB = 9.748624D+00 MDL_MA2 = 1.664391D+05 - MDL_MB = 4.889917D+00 - MDL_MT = 1.750000D+02 - MDL_MTA = 1.777000D+00 - MDL_MZ = 9.118760D+01 - MDL_MW = 7.982901D+01 - MDL_MH01 = 1.108991D+02 - MDL_MH02 = 3.999601D+02 - MDL_MA0 = 3.995839D+02 - MDL_MH = 4.078790D+02 MDL_MSD1 = 5.684411D+02 MDL_MSU1 = 5.611190D+02 MDL_MSD3 = 5.130652D+02 @@ -34,12 +59,21 @@ MDL_MNEU3 = -3.637560D+02 MDL_MNEU4 = 3.817294D+02 MDL_MCH2 = 3.799393D+02 + MDL_MTA = 1.777000D+00 MDL_MSD4 = 5.452285D+02 MDL_MSU4 = 5.492593D+02 MDL_MSD6 = 5.437267D+02 MDL_MSU6 = 5.857858D+02 MDL_MSL4 = 1.441028D+02 MDL_MSL6 = 2.068678D+02 + MDL_MZ = 9.118760D+01 + MDL_MW = 7.982901D+01 + MDL_MH01 = 1.108991D+02 + MDL_MH02 = 3.999601D+02 + MDL_MA0 = 3.995839D+02 + MDL_MH = 4.078790D+02 + MDL_MB = 4.889917D+00 + MDL_MT = 1.750000D+02 MDL_RMD21X1 = 2.736847D+05 MDL_RMD23X3 = 2.702620D+05 MDL_RME21X1 = 1.863063D+04 @@ -47,10 +81,10 @@ MDL_RML21X1 = 3.815567D+04 MDL_RML23X3 = 3.782868D+04 MDL_RMX1 = 1.013965D+02 - MDL_RMX2 = 1.915042D+02 - MDL_RMX3 = 5.882630D+02 MDL_MHD2 = 3.233749D+04 MDL_MHU2 = -1.288001D+05 + MDL_RMX2 = 1.915042D+02 + MDL_RMX3 = 5.882630D+02 MDL_RMQ21X1 = 2.998367D+05 MDL_RMQ23X3 = 2.487654D+05 MDL_RMU21X1 = 2.803821D+05 @@ -112,37 +146,3 @@ MDL_RYD3X3 = 1.388402D-01 MDL_RYE3X3 = 1.008908D-01 MDL_RYU3X3 = 8.928445D-01 - MDL_WT = 1.561950D+00 - MDL_WZ = 2.411433D+00 - MDL_WW = 2.002822D+00 - MDL_WH01 = 1.986108D-03 - MDL_WH02 = 5.748014D-01 - MDL_WA0 = 6.321785D-01 - MDL_WH = 5.469628D-01 - MDL_WSD1 = 5.312788D+00 - MDL_WSU1 = 5.477195D+00 - MDL_WSD2 = 5.312788D+00 - MDL_WSU2 = 5.477195D+00 - MDL_WSD3 = 3.736276D+00 - MDL_WSU3 = 2.021596D+00 - MDL_WSL1 = 2.136822D-01 - MDL_WSN1 = 1.498816D-01 - MDL_WSL2 = 2.136822D-01 - MDL_WSN2 = 1.498816D-01 - MDL_WSL3 = 1.483273D-01 - MDL_WSN3 = 1.475190D-01 - MDL_WGO = 5.506754D+00 - MDL_WNEU2 = 2.077700D-02 - MDL_WCH1 = 1.704145D-02 - MDL_WNEU3 = -1.915985D+00 - MDL_WNEU4 = 2.585851D+00 - MDL_WCH2 = 2.486895D+00 - MDL_WSD4 = 2.858123D-01 - MDL_WSU4 = 1.152973D+00 - MDL_WSD5 = 2.858123D-01 - MDL_WSU5 = 1.152973D+00 - MDL_WSD6 = 8.015663D-01 - MDL_WSU6 = 7.373133D+00 - MDL_WSL4 = 2.161216D-01 - MDL_WSL5 = 2.161216D-01 - MDL_WSL6 = 2.699061D-01 diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 493dd48022..87aa956997 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.133 s +1 processes with 6 diagrams generated in 0.128 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -558,7 +558,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t1 t1~ @1 @@ -567,10 +567,10 @@ INFO: Processing color information for process: g g > t1 t1~ @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -582,17 +582,17 @@ ALOHA: aloha creates 3 routines in 0.188 s VSS1 VSS1 VVSS1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.361s -user 0m1.285s -sys 0m0.066s +real 0m1.370s +user 0m1.293s +sys 0m0.063s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 1bc4eab35c..07d653b183 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.122 s +1 processes with 3 diagrams generated in 0.123 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -563,10 +563,10 @@ It has been validated for the last time with version: 3.5.2 INFO: initialize a new directory: CODEGEN_mad_susy_gg_tt INFO: remove old information in CODEGEN_mad_susy_gg_tt DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards  -WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt  +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards  +WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses  INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -582,38 +582,38 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.076 s +Wrote files for 10 helas calls in 0.077 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.139 s +ALOHA: aloha creates 2 routines in 0.142 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. The option zerowidth_tchannel is modified [True] but will not be written in the configuration files. If you want to make this value the default for future session, you can run 'save options --all' -save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt INFO: Use Fortran compiler gfortran INFO: Use c++ compiler g++ INFO: Generate jpeg diagrams INFO: Generate web pages -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common patching file SubProcesses/makefile -DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 +DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file driver.f Hunk #1 succeeded at 76 (offset 2 lines). Hunk #2 succeeded at 280 (offset 8 lines). @@ -621,15 +621,15 @@ Hunk #3 succeeded at 489 (offset 13 lines). patching file matrix1.f Hunk #2 succeeded at 227 (offset 13 lines). DEBUG: p.returncode =  0 [output.py at line 258]  -Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done. +Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done. Type "launch" to generate events from this process, or see -/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README +/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m2.945s -user 0m2.632s -sys 0m0.271s +real 0m2.947s +user 0m2.622s +sys 0m0.285s Code generation completed in 3 seconds ************************************************************ * * @@ -651,9 +651,9 @@ Code generation completed in 3 seconds * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt @@ -681,9 +681,9 @@ launch in debug mode * Type 'help' for in-line help. * * * ************************************************************ -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt -INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt +INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt index 4f5079f78a..68b4c46295 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt @@ -235,7 +235,7 @@ # pineappl = pineappl -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo # MG5 MAIN DIRECTORY -#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo +#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/param_card.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/param_card.inc index eb66efe001..6acb037f00 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/Source/param_card.inc +++ b/epochX/cudacpp/susy_gg_tt.mad/Source/param_card.inc @@ -1,3 +1,37 @@ + MDL_WSD1 = 5.312788D+00 + MDL_WSU1 = 5.477195D+00 + MDL_WSD2 = 5.312788D+00 + MDL_WSU2 = 5.477195D+00 + MDL_WSD3 = 3.736276D+00 + MDL_WSU3 = 2.021596D+00 + MDL_WSL1 = 2.136822D-01 + MDL_WSN1 = 1.498816D-01 + MDL_WSL2 = 2.136822D-01 + MDL_WSN2 = 1.498816D-01 + MDL_WSL3 = 1.483273D-01 + MDL_WSN3 = 1.475190D-01 + MDL_WGO = 5.506754D+00 + MDL_WNEU2 = 2.077700D-02 + MDL_WCH1 = 1.704145D-02 + MDL_WNEU3 = -1.915985D+00 + MDL_WNEU4 = 2.585851D+00 + MDL_WCH2 = 2.486895D+00 + MDL_WSD4 = 2.858123D-01 + MDL_WSU4 = 1.152973D+00 + MDL_WSD5 = 2.858123D-01 + MDL_WSU5 = 1.152973D+00 + MDL_WSD6 = 8.015663D-01 + MDL_WSU6 = 7.373133D+00 + MDL_WSL4 = 2.161216D-01 + MDL_WSL5 = 2.161216D-01 + MDL_WSL6 = 2.699061D-01 + MDL_WZ = 2.411433D+00 + MDL_WW = 2.002822D+00 + MDL_WH01 = 1.986108D-03 + MDL_WH02 = 5.748014D-01 + MDL_WA0 = 6.321785D-01 + MDL_WH = 5.469628D-01 + MDL_WT = 1.561950D+00 MDL_RRD1X1 = 1.000000D+00 MDL_RRD2X2 = 1.000000D+00 MDL_RRD3X3 = 9.387379D-01 @@ -10,15 +44,6 @@ MDL_RMUH = 3.576810D+02 MDL_TB = 9.748624D+00 MDL_MA2 = 1.664391D+05 - MDL_MB = 4.889917D+00 - MDL_MT = 1.750000D+02 - MDL_MTA = 1.777000D+00 - MDL_MZ = 9.118760D+01 - MDL_MW = 7.982901D+01 - MDL_MH01 = 1.108991D+02 - MDL_MH02 = 3.999601D+02 - MDL_MA0 = 3.995839D+02 - MDL_MH = 4.078790D+02 MDL_MSD1 = 5.684411D+02 MDL_MSU1 = 5.611190D+02 MDL_MSD3 = 5.130652D+02 @@ -34,12 +59,21 @@ MDL_MNEU3 = -3.637560D+02 MDL_MNEU4 = 3.817294D+02 MDL_MCH2 = 3.799393D+02 + MDL_MTA = 1.777000D+00 MDL_MSD4 = 5.452285D+02 MDL_MSU4 = 5.492593D+02 MDL_MSD6 = 5.437267D+02 MDL_MSU6 = 5.857858D+02 MDL_MSL4 = 1.441028D+02 MDL_MSL6 = 2.068678D+02 + MDL_MZ = 9.118760D+01 + MDL_MW = 7.982901D+01 + MDL_MH01 = 1.108991D+02 + MDL_MH02 = 3.999601D+02 + MDL_MA0 = 3.995839D+02 + MDL_MH = 4.078790D+02 + MDL_MB = 4.889917D+00 + MDL_MT = 1.750000D+02 MDL_RMD21X1 = 2.736847D+05 MDL_RMD23X3 = 2.702620D+05 MDL_RME21X1 = 1.863063D+04 @@ -47,10 +81,10 @@ MDL_RML21X1 = 3.815567D+04 MDL_RML23X3 = 3.782868D+04 MDL_RMX1 = 1.013965D+02 - MDL_RMX2 = 1.915042D+02 - MDL_RMX3 = 5.882630D+02 MDL_MHD2 = 3.233749D+04 MDL_MHU2 = -1.288001D+05 + MDL_RMX2 = 1.915042D+02 + MDL_RMX3 = 5.882630D+02 MDL_RMQ21X1 = 2.998367D+05 MDL_RMQ23X3 = 2.487654D+05 MDL_RMU21X1 = 2.803821D+05 @@ -112,37 +146,3 @@ MDL_RYD3X3 = 1.388402D-01 MDL_RYE3X3 = 1.008908D-01 MDL_RYU3X3 = 8.928445D-01 - MDL_WT = 1.561950D+00 - MDL_WZ = 2.411433D+00 - MDL_WW = 2.002822D+00 - MDL_WH01 = 1.986108D-03 - MDL_WH02 = 5.748014D-01 - MDL_WA0 = 6.321785D-01 - MDL_WH = 5.469628D-01 - MDL_WSD1 = 5.312788D+00 - MDL_WSU1 = 5.477195D+00 - MDL_WSD2 = 5.312788D+00 - MDL_WSU2 = 5.477195D+00 - MDL_WSD3 = 3.736276D+00 - MDL_WSU3 = 2.021596D+00 - MDL_WSL1 = 2.136822D-01 - MDL_WSN1 = 1.498816D-01 - MDL_WSL2 = 2.136822D-01 - MDL_WSN2 = 1.498816D-01 - MDL_WSL3 = 1.483273D-01 - MDL_WSN3 = 1.475190D-01 - MDL_WGO = 5.506754D+00 - MDL_WNEU2 = 2.077700D-02 - MDL_WCH1 = 1.704145D-02 - MDL_WNEU3 = -1.915985D+00 - MDL_WNEU4 = 2.585851D+00 - MDL_WCH2 = 2.486895D+00 - MDL_WSD4 = 2.858123D-01 - MDL_WSU4 = 1.152973D+00 - MDL_WSD5 = 2.858123D-01 - MDL_WSU5 = 1.152973D+00 - MDL_WSD6 = 8.015663D-01 - MDL_WSU6 = 7.373133D+00 - MDL_WSL4 = 2.161216D-01 - MDL_WSL5 = 2.161216D-01 - MDL_WSL6 = 2.699061D-01 diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 206640f843..d45b33dd9a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -48,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs Using default text editor "vi". Set another one in ./input/mg5_configuration.txt Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt -import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg +import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg The import format was not given, so we guess it as command set stdout_level DEBUG set output information to level: 10 @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model MSSM_SLHA2 INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.9339261054992676  +DEBUG: model prefixing takes 0.9373023509979248  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -561,7 +561,7 @@ It has been validated for the last time with version: 3.5.2 Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 165]  DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 170]  -INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt +INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt INFO: Organizing processes into subprocess groups INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 @@ -570,30 +570,30 @@ INFO: Processing color information for process: g g > t t~ @1 DEBUG: type(fortran_model)= [output.py at line 214]  DEBUG: type(me)= me=0 [output.py at line 215]  DEBUG: "need to link", self.to_link_in_P =  need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [output.py at line 216]  -INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc -INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. +INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc +INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.141 s VVV1 FFV1 FFV1 FFV1 -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h -INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h +INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. super_write_set_parameters_onlyfixMajorana (hardcoded=False) super_write_set_parameters_onlyfixMajorana (hardcoded=True) -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h -FileWriter for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h +FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory -INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. +INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.452s -user 0m2.316s -sys 0m0.082s +real 0m2.499s +user 0m2.333s +sys 0m0.068s Code generation completed in 3 seconds From 9f7bc605ada8962c2ebc226b86e9898b6c77a68d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 17:45:44 +0300 Subject: [PATCH 35/76] [amd] in gg_tt.mad and CODEGEN, fix cudacpp.mk to find the correct path to libamdhip64 #998 Also fix the LUMI setup to solve a second issue (move from 23.09 to 24.03) module load LUMI/24.03 partition/G module load cpeGNU/24.03 export CC="cc --cray-bypass-pkgconfig -craype-verbose" export CXX="CC --cray-bypass-pkgconfig -craype-verbose" export FC="ftn --cray-bypass-pkgconfig -craype-verbose -ffixed-line-length-132" (I checked that gg_tt.mad is regenerated as expected) --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 4 ++-- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index c1eea3d70f..a610dc3ea8 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif From b4f1689889279e87d0116b2e9ade1cdb5da36d33 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 16 Sep 2024 19:39:55 +0200 Subject: [PATCH 36/76] [amd] regenerate all processes with fixes for libamdhip64 in cudacpp.mk --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 +++++------ .../ee_mumu.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++++---- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 4 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 14 +++++------ .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++++---- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 4 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 14 +++++------ .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 4 ++-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 8 +++---- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 4 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 8 +++---- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 4 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +++++++------- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 +++++------ .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 4 ++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 16 ++++++------- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +++++------ .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 4 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 14 +++++------ .../gq_ttq.mad/SubProcesses/cudacpp.mk | 4 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 10 ++++---- .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 10 ++++---- .../heft_gg_bb.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 13 ++++------ .../heft_gg_bb.sa/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 20 ++++++++-------- .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_pp_tt012j_log.txt | 24 +++++++++---------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++++++++-------- .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 +++++------ .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 16 ++++++------- .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 12 +++++----- .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_mad_susy_gg_tt_log.txt | 14 +++++------ .../susy_gg_tt.mad/SubProcesses/cudacpp.mk | 4 ++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 15 +++++------- .../susy_gg_tt.sa/SubProcesses/cudacpp.mk | 4 ++-- 45 files changed, 201 insertions(+), 209 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index ed08e994bb..c4af716d3e 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005808353424072266  +DEBUG: model prefixing takes 0.0058557987213134766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,19 +182,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.075 s +Wrote files for 8 helas calls in 0.071 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.211 s +ALOHA: aloha creates 3 routines in 0.208 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.262 s +ALOHA: aloha creates 7 routines in 0.264 s FFV1 FFV1 FFV2 @@ -234,9 +234,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.190s -user 0m1.811s -sys 0m0.293s +real 0m2.098s +user 0m1.838s +sys 0m0.249s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 90b16d55ff..2314b032c5 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00577545166015625  +DEBUG: model prefixing takes 0.00559544563293457  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.274 s +ALOHA: aloha creates 4 routines in 0.277 s FFV1 FFV1 FFV2 @@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.708s -user 0m0.606s -sys 0m0.059s +real 0m0.668s +user 0m0.612s +sys 0m0.051s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 5883f45027..49092b4162 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005657672882080078  +DEBUG: model prefixing takes 0.00574946403503418  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.008 s +1 processes with 3 diagrams generated in 0.009 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -188,11 +188,11 @@ Wrote files for 10 helas calls in 0.074 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.150 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.137 s VVV1 FFV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.927s -user 0m1.669s -sys 0m0.259s +real 0m1.968s +user 0m1.645s +sys 0m0.279s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 73ac7d8cdc..8ca09600ad 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005669593811035156  +DEBUG: model prefixing takes 0.0057675838470458984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -176,7 +176,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.150 s VVV1 FFV1 FFV1 @@ -191,7 +191,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.550s +real 0m0.558s user 0m0.483s -sys 0m0.062s +sys 0m0.061s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b0273fb24d..7e07d9fbf2 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056915283203125  +DEBUG: model prefixing takes 0.005762577056884766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,14 +212,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.335 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.322 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -267,10 +267,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.705s -user 0m2.357s -sys 0m0.302s -Code generation completed in 3 seconds +real 0m2.668s +user 0m2.323s +sys 0m0.316s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 647dfb69b1..ece3a1cec9 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005728483200073242  +DEBUG: model prefixing takes 0.0057582855224609375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -191,7 +191,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.334 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.510s +real 0m2.526s user 0m2.214s -sys 0m0.285s +sys 0m0.286s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 70c71a74a1..e2ccc79a65 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005589723587036133  +DEBUG: model prefixing takes 0.00581049919128418  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -199,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.803s -user 0m0.748s -sys 0m0.049s +real 0m0.837s +user 0m0.751s +sys 0m0.055s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index fd4150a8d6..5d00f6a26e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005757808685302734  +DEBUG: model prefixing takes 0.005704164505004883  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.166 s +1 processes with 123 diagrams generated in 0.163 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.449 s -Wrote files for 222 helas calls in 0.693 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.445 s +Wrote files for 222 helas calls in 0.688 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.350 s +ALOHA: aloha creates 5 routines in 0.342 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.329 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -242,9 +242,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.932s -user 0m3.633s -sys 0m0.291s +real 0m3.947s +user 0m3.631s +sys 0m0.278s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 72dce09a64..f9c6193903 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005761146545410156  +DEBUG: model prefixing takes 0.0057904720306396484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.165 s +1 processes with 123 diagrams generated in 0.163 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.438 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.329 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.489s -user 0m1.409s -sys 0m0.072s +real 0m1.545s +user 0m1.414s +sys 0m0.054s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 0f10271b3c..2edaf9a3a4 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057866573333740234  +DEBUG: model prefixing takes 0.005669116973876953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.951 s +1 processes with 1240 diagrams generated in 1.938 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -184,8 +184,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.778 s -Wrote files for 2281 helas calls in 19.046 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.779 s +Wrote files for 2281 helas calls in 18.956 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -200,7 +200,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.333 s VVV1 VVV1 FFV1 @@ -244,9 +244,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m33.700s -user 0m33.094s -sys 0m0.485s +real 0m33.677s +user 0m33.088s +sys 0m0.436s Code generation completed in 34 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 4a3ba8518e..6b3f5ac37b 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005571842193603516  +DEBUG: model prefixing takes 0.00566554069519043  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.935 s +1 processes with 1240 diagrams generated in 1.997 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.885 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.792 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.364 s +ALOHA: aloha creates 5 routines in 0.358 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.596s -user 0m13.408s -sys 0m0.113s +real 0m13.498s +user 0m13.348s +sys 0m0.099s Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index d3e9530428..61ff93de59 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056192874908447266  +DEBUG: model prefixing takes 0.005731821060180664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -216,17 +216,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s Wrote files for 32 helas calls in 0.168 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.150 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.137 s FFV1 FFV1 FFV1 @@ -272,9 +272,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.234s -user 0m1.920s -sys 0m0.312s +real 0m2.244s +user 0m1.937s +sys 0m0.305s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index db04f926c4..0100caee86 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005569934844970703  +DEBUG: model prefixing takes 0.005804538726806641  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.083 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -224,7 +224,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.778s -user 0m0.610s -sys 0m0.054s +real 0m0.662s +user 0m0.608s +sys 0m0.050s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 6a7dd15b25..33cc2a6d99 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -162,7 +162,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.274 s +ALOHA: aloha creates 4 routines in 0.273 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -206,10 +206,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.197s -user 0m1.912s -sys 0m0.271s -Code generation completed in 2 seconds +real 0m3.048s +user 0m1.889s +sys 0m0.292s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index e629afa7b0..ebc8a90b6f 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -57,11 +57,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft -INFO: load particles -INFO: load vertices -WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.006257057189941406  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -155,7 +150,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.269 s +ALOHA: aloha creates 4 routines in 0.270 s VVS3 VVV1 FFV1 @@ -172,7 +167,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.678s -user 0m0.624s -sys 0m0.044s +real 0m0.812s +user 0m0.602s +sys 0m0.046s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 5a19d6bcfd..3a41d39d53 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057599544525146484  +DEBUG: model prefixing takes 0.005716800689697266  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.111 s +4 processes with 8 diagrams generated in 0.112 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -222,7 +222,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.666 s +12 processes with 144 diagrams generated in 0.665 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -353,19 +353,19 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.206 s -Wrote files for 212 helas calls in 0.845 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.209 s +Wrote files for 212 helas calls in 0.839 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.209 s +ALOHA: aloha creates 3 routines in 0.207 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.206 s +ALOHA: aloha creates 6 routines in 0.204 s FFV1 FFV1 FFV1 @@ -461,9 +461,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m5.337s -user 0m4.211s -sys 0m0.535s +real 0m4.738s +user 0m4.183s +sys 0m0.543s Code generation completed in 5 seconds ************************************************************ * * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index bb75e72b2c..2d56b04fff 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00578761100769043  +DEBUG: model prefixing takes 0.005767345428466797  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -167,7 +167,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.030 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -207,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.141 s +13 processes with 76 diagrams generated in 0.142 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -373,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.915 s +65 processes with 1119 diagrams generated in 1.881 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -691,8 +691,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1548]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.340 s -Wrote files for 810 helas calls in 2.854 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.320 s +Wrote files for 810 helas calls in 2.822 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -700,14 +700,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.349 s +ALOHA: aloha creates 5 routines in 0.345 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.330 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -885,10 +885,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m11.928s -user 0m9.873s -sys 0m0.901s -Code generation completed in 12 seconds +real 0m10.728s +user 0m9.718s +sys 0m0.942s +Code generation completed in 11 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index b14bad49de..f51c5375c2 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14215660095214844  +DEBUG: model prefixing takes 0.14151597023010254  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.819 s +1 processes with 72 diagrams generated in 3.797 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -119,8 +119,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s -Wrote files for 119 helas calls in 0.400 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s +Wrote files for 119 helas calls in 0.397 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -128,14 +128,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.326 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.342 s +ALOHA: aloha creates 10 routines in 0.336 s VVV5 VVV5 FFV1 @@ -176,10 +176,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.523s -user 0m7.077s -sys 0m0.280s -Code generation completed in 8 seconds +real 0m7.305s +user 0m7.006s +sys 0m0.283s +Code generation completed in 7 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 4faa62ba00..536b88812d 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14309954643249512  +DEBUG: model prefixing takes 0.14238429069519043  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.855 s +1 processes with 72 diagrams generated in 3.783 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -109,14 +109,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.200 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.327 s VVV5 VVV5 FFV1 @@ -136,7 +136,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.412s -user 0m5.195s +real 0m5.220s +user 0m5.128s sys 0m0.069s -Code generation completed in 6 seconds +Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index fa543aa715..3c66cbb0ec 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -581,19 +581,19 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.089 s +Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s +Wrote files for 16 helas calls in 0.084 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.192 s +ALOHA: aloha creates 3 routines in 0.188 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.188 s +ALOHA: aloha creates 6 routines in 0.187 s VVV1 VSS1 VSS1 @@ -630,10 +630,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.764s -user 0m2.748s -sys 0m0.306s -Code generation completed in 4 seconds +real 0m3.115s +user 0m2.723s +sys 0m0.305s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 87aa956997..8c46a18101 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.128 s +1 processes with 6 diagrams generated in 0.126 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -576,7 +576,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.188 s +ALOHA: aloha creates 3 routines in 0.190 s VVV1 VSS1 VSS1 @@ -592,7 +592,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.370s -user 0m1.293s -sys 0m0.063s -Code generation completed in 1 seconds +real 0m1.453s +user 0m1.294s +sys 0m0.055s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 07d653b183..fafdc9960e 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.123 s +1 processes with 3 diagrams generated in 0.120 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -582,16 +582,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.077 s +Wrote files for 10 helas calls in 0.076 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.140 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.137 s VVV1 FFV1 FFV1 @@ -627,9 +627,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.947s -user 0m2.622s -sys 0m0.285s +real 0m2.929s +user 0m2.609s +sys 0m0.281s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index d45b33dd9a..b4451876ff 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -54,9 +54,6 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.9373023509979248  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -552,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.112 s +1 processes with 3 diagrams generated in 0.121 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -578,7 +575,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.142 s VVV1 FFV1 FFV1 @@ -593,7 +590,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.499s -user 0m2.333s -sys 0m0.068s -Code generation completed in 3 seconds +real 0m1.335s +user 0m1.264s +sys 0m0.052s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index 47e2f4233a..5ffb286fef 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -874,7 +874,7 @@ endif $(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) endif @@ -975,7 +975,7 @@ else # link only runTest_$(GPUSUFFIX).o (new: in the past, this was linking both $(gpu_testmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(GTESTLIBS) ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 + $(FC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lstdc++ -lpthread -L$(shell cd -L $(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../..; pwd)/lib -lamdhip64 else $(GPUCC) -o $@ $(gpu_objects_lib) $(gpu_objects_exe) -ldl $(LIBFLAGS) -lcuda endif From 212a9e0df53a1311a20c54991d236249977f2748 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 08:19:35 +0300 Subject: [PATCH 37/76] [amd] in tput/allTees.sh clarify that -cpponly and -nocuda exist while -hip is no longer available --- epochX/cudacpp/tput/allTees.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh index f66933c116..b99521a3f8 100755 --- a/epochX/cudacpp/tput/allTees.sh +++ b/epochX/cudacpp/tput/allTees.sh @@ -42,10 +42,10 @@ while [ "$1" != "" ]; do # Skip -makeclean (e.g. for brand new generated/downloaded code) makeclean= shift - elif [ "$1" == "-hip" ]; then - if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi - bblds="$1" - shift + ###elif [ "$1" == "-hip" ]; then + ### if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi + ### bblds="$1" + ### shift elif [ "$1" == "-nocuda" ]; then if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi bblds="$1" @@ -61,7 +61,7 @@ while [ "$1" != "" ]; do bsm=$1 shift else - echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-nocuda] [-bsmonly|-nobsm]" + echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-nocuda|-cpponly] [-bsmonly|-nobsm]" exit 1 fi done From 1358fcba43c65bd28a51581fcfa05592b46366ba Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 08:33:03 +0300 Subject: [PATCH 38/76] [amd] in tput/allTees.sh, on second thought add back -hip, but make this identical to -nocuda for the moment (common random) --- epochX/cudacpp/tput/allTees.sh | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh index b99521a3f8..fb60a214b0 100755 --- a/epochX/cudacpp/tput/allTees.sh +++ b/epochX/cudacpp/tput/allTees.sh @@ -42,10 +42,10 @@ while [ "$1" != "" ]; do # Skip -makeclean (e.g. for brand new generated/downloaded code) makeclean= shift - ###elif [ "$1" == "-hip" ]; then - ### if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi - ### bblds="$1" - ### shift + elif [ "$1" == "-hip" ]; then + if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi + bblds="$1" + shift elif [ "$1" == "-nocuda" ]; then if [ "${bblds}" != "" ] && [ "${bblds}" != "$1" ]; then echo "ERROR! Incompatible option $1: backend builds are already defined as '$bblds'"; usage; fi bblds="$1" @@ -61,7 +61,7 @@ while [ "$1" != "" ]; do bsm=$1 shift else - echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-nocuda|-cpponly] [-bsmonly|-nobsm]" + echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-nomakeclean] [-hip|-nocuda|-cpponly] [-bsmonly|-nobsm]" exit 1 fi done @@ -75,13 +75,13 @@ elif [ "$bblds" == "-cpponly" ]; then # Random numbers use common instead of curand rndhst=-common opts+=" -cpponly" -###elif [ "$bblds" == "-hip" ]; then -### #### Random numbers use hiprand instead of curand? -### ###rndhst=-hirhst -### # See https://github.com/ROCm/hipRAND/issues/76 -### # Random numbers use common (not hiprand) instead of curand? -### rndhst=-common -### opts+=" -nocuda" +elif [ "$bblds" == "-hip" ]; then # NB: currently (Sep 2024) this is identical to -nocuda + #### Random numbers use hiprand instead of curand? + #### This needs ROCm 6.2 (see https://github.com/ROCm/hipRAND/issues/76) + ###rndhst=-hirhst + # Random numbers use common (not hiprand) instead of curand + rndhst=-common + opts+=" -nocuda" fi # This is a script to launch in one go all tests for the (4 or) 5 main processes in this repository From 5ecc6991dcb32128bb11842406c883a22c458ed3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 09:28:22 +0300 Subject: [PATCH 39/76] [amd] rerun 96 tput tests on LUMI - many issues at build time and at runtime (1) Build tests on login node (~2h) ./tput/allTees.sh -makeonly STARTED AT Mon 16 Sep 2024 08:41:05 PM EEST ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean -makeonly ENDED(1) AT Mon 16 Sep 2024 09:17:11 PM EEST [Status=1] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean -makeonly ENDED(2) AT Mon 16 Sep 2024 09:30:48 PM EEST [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean -makeonly ENDED(3) AT Mon 16 Sep 2024 09:33:43 PM EEST [Status=1] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst -makeonly ENDED(4) AT Mon 16 Sep 2024 09:33:51 PM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst -makeonly ENDED(5) AT Mon 16 Sep 2024 09:34:00 PM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -makeonly ENDED(6) AT Mon 16 Sep 2024 09:34:09 PM EEST [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean -makeonly ENDED(7) AT Mon 16 Sep 2024 09:59:55 PM EEST [Status=0] (2) Step 2 - run tests on worker nodes (~1h) ./tput/allTees.sh -hip STARTED AT Tue 17 Sep 2024 08:35:08 AM EEST ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean -nocuda ENDED(1) AT Tue 17 Sep 2024 09:08:52 AM EEST [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean -nocuda ENDED(2) AT Tue 17 Sep 2024 09:12:28 AM EEST [Status=2] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean -nocuda ENDED(3) AT Tue 17 Sep 2024 09:18:56 AM EEST [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst -nocuda ENDED(4) AT Tue 17 Sep 2024 09:19:30 AM EEST [Status=2] SKIP './tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda' ENDED(5) AT Tue 17 Sep 2024 09:19:30 AM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda ENDED(6) AT Tue 17 Sep 2024 09:20:03 AM EEST [Status=2] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean -nocuda ENDED(7) AT Tue 17 Sep 2024 09:26:15 AM EEST [Status=2] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 248 ++----------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 265 ++------------ .../log_eemumu_mad_d_inl0_hrd0_common.txt | 248 ++----------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 251 ++------------ .../log_eemumu_mad_d_inl0_hrd1.txt | 248 ++----------- .../log_eemumu_mad_d_inl1_hrd0.txt | 248 ++----------- .../log_eemumu_mad_d_inl1_hrd1.txt | 248 ++----------- .../log_eemumu_mad_f_inl0_hrd0.txt | 248 ++----------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 265 ++------------ .../log_eemumu_mad_f_inl0_hrd0_common.txt | 248 ++----------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 251 ++------------ .../log_eemumu_mad_f_inl0_hrd1.txt | 248 ++----------- .../log_eemumu_mad_f_inl1_hrd0.txt | 248 ++----------- .../log_eemumu_mad_f_inl1_hrd1.txt | 248 ++----------- .../log_eemumu_mad_m_inl0_hrd0.txt | 248 ++----------- .../log_eemumu_mad_m_inl0_hrd1.txt | 248 ++----------- .../log_ggtt_mad_d_inl0_hrd0.txt | 248 ++----------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 265 ++------------ .../log_ggtt_mad_d_inl0_hrd0_common.txt | 248 ++----------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 251 ++------------ .../log_ggtt_mad_d_inl0_hrd1.txt | 248 ++----------- .../log_ggtt_mad_d_inl1_hrd0.txt | 248 ++----------- .../log_ggtt_mad_d_inl1_hrd1.txt | 248 ++----------- .../log_ggtt_mad_f_inl0_hrd0.txt | 248 ++----------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 265 ++------------ .../log_ggtt_mad_f_inl0_hrd0_common.txt | 248 ++----------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 251 ++------------ .../log_ggtt_mad_f_inl0_hrd1.txt | 248 ++----------- .../log_ggtt_mad_f_inl1_hrd0.txt | 248 ++----------- .../log_ggtt_mad_f_inl1_hrd1.txt | 248 ++----------- .../log_ggtt_mad_m_inl0_hrd0.txt | 248 ++----------- .../log_ggtt_mad_m_inl0_hrd1.txt | 248 ++----------- .../log_ggttg_mad_d_inl0_hrd0.txt | 269 ++------------ .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 289 +++------------- .../log_ggttg_mad_d_inl0_hrd1.txt | 269 ++------------ .../log_ggttg_mad_f_inl0_hrd0.txt | 269 ++------------ .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 289 +++------------- .../log_ggttg_mad_f_inl0_hrd1.txt | 269 ++------------ .../log_ggttg_mad_m_inl0_hrd0.txt | 269 ++------------ .../log_ggttg_mad_m_inl0_hrd1.txt | 269 ++------------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 269 ++------------ .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 289 +++------------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++------------ .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 274 +++------------ .../log_ggttgg_mad_d_inl0_hrd1.txt | 269 ++------------ .../log_ggttgg_mad_d_inl1_hrd0.txt | 269 ++------------ .../log_ggttgg_mad_d_inl1_hrd1.txt | 269 ++------------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 269 ++------------ .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 289 +++------------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 269 ++------------ .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 274 +++------------ .../log_ggttgg_mad_f_inl0_hrd1.txt | 269 ++------------ .../log_ggttgg_mad_f_inl1_hrd0.txt | 269 ++------------ .../log_ggttgg_mad_f_inl1_hrd1.txt | 269 ++------------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 267 ++------------ .../log_ggttgg_mad_m_inl0_hrd1.txt | 267 ++------------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 ++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 ++++---------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 ++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++---------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 +++++---------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 ++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 ++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 293 ++++++---------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 313 ++++++----------- .../log_gqttq_mad_d_inl0_hrd1.txt | 293 ++++++---------- .../log_gqttq_mad_f_inl0_hrd0.txt | 307 +++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 327 +++++++----------- .../log_gqttq_mad_f_inl0_hrd1.txt | 307 +++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 293 ++++++---------- .../log_gqttq_mad_m_inl0_hrd1.txt | 293 ++++++---------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 248 ++----------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 248 ++----------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 254 ++------------ .../log_heftggbb_mad_f_inl0_hrd1.txt | 258 ++------------ .../log_heftggbb_mad_m_inl0_hrd0.txt | 256 ++------------ .../log_heftggbb_mad_m_inl0_hrd1.txt | 256 ++------------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 269 ++------------ .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 269 ++------------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 269 ++------------ .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 269 ++------------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 269 ++------------ .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 269 ++------------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 246 ++----------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 246 ++----------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 248 ++----------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 248 ++----------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 246 ++----------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 246 ++----------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 248 ++----------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 248 ++----------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 248 ++----------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 248 ++----------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 246 ++----------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 246 ++----------- 96 files changed, 4040 insertions(+), 21020 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 1292ed24b8..e7b206ce0f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:02:59 -DATE: 2024-09-15_11:08:03 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.330379e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.527996e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.788543e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.812638 sec -INFO: No Floating Point Exceptions have been reported - 2,711,766,628 cycles # 2.867 GHz - 4,239,903,132 instructions # 1.56 insn per cycle - 1.138564764 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.032481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205909e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.205909e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.541164 sec -INFO: No Floating Point Exceptions have been reported - 19,214,248,144 cycles # 2.933 GHz - 46,179,436,349 instructions # 2.40 insn per cycle - 6.552095575 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.566602e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.052859e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.052859e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.470271 sec -INFO: No Floating Point Exceptions have been reported - 13,145,357,361 cycles # 2.934 GHz - 31,720,883,797 instructions # 2.41 insn per cycle - 4.481479023 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.961947e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.743984e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.743984e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.647893 sec -INFO: No Floating Point Exceptions have been reported - 10,212,054,728 cycles # 2.792 GHz - 19,686,910,587 instructions # 1.93 insn per cycle - 3.658422867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.012892e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.837375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.837375e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.568887 sec -INFO: No Floating Point Exceptions have been reported - 10,042,390,879 cycles # 2.806 GHz - 19,342,891,969 instructions # 1.93 insn per cycle - 3.579550757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 838,411,756 cycles:u # 0.390 GHz (74.20%) + 2,596,463 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.07%) + 6,168,888 stalled-cycles-backend:u # 0.74% backend cycles idle (75.24%) + 1,307,882,314 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 2.974088628 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.687611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233598e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233598e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.179012 sec -INFO: No Floating Point Exceptions have been reported - 8,766,087,418 cycles # 2.093 GHz - 15,826,503,490 instructions # 1.81 insn per cycle - 4.190350116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165208E-002 + File "", line 1 + me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 656f6e2f98..7e766b1c09 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,45 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:16:58 -DATE: 2024-09-15_11:45:05 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.206256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682542e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.682542e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.457281 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,617,652,543 cycles # 2.830 GHz - 12,995,599,451 instructions # 1.71 insn per cycle - 2.778749807 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 2,173,251,407 cycles:u # 2.752 GHz (74.86%) + 11,405,504 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.61%) + 550,647,183 stalled-cycles-backend:u # 25.34% backend cycles idle (75.60%) + 2,544,263,226 instructions:u # 1.17 insn per cycle + # 0.22 stalled cycles per insn (74.64%) + 0.825203734 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,184 +47,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.578808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.114252e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.114252e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.297311 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 20,775,260,342 cycles # 2.853 GHz - 46,581,102,942 instructions # 2.24 insn per cycle - 7.320240357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.428539e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.841895e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.841895e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.145544 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 14,656,531,148 cycles # 2.850 GHz - 32,719,868,481 instructions # 2.23 insn per cycle - 5.168221991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.782197e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438081e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.265326 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,667,988,214 cycles # 2.728 GHz - 21,208,810,330 instructions # 1.82 insn per cycle - 4.287433901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.824257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.501728e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.501728e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.177936 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,439,960,592 cycles # 2.728 GHz - 20,869,154,642 instructions # 1.82 insn per cycle - 4.198165961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.555883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.022583e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.022583e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.778023 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,270,194,102 cycles # 2.143 GHz - 17,125,695,085 instructions # 1.67 insn per cycle - 4.797944534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165208E-002 + File "", line 1 + me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index c883b5b3b2..09f746db11 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:19:34 -DATE: 2024-09-15_11:57:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.026987e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683583e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.857936e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.462390 sec -INFO: No Floating Point Exceptions have been reported - 4,897,119,539 cycles # 2.892 GHz - 7,502,819,293 instructions # 1.53 insn per cycle - 1.751403177 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.029304e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.201451e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.201451e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 7.039422 sec -INFO: No Floating Point Exceptions have been reported - 20,621,489,655 cycles # 2.926 GHz - 46,653,049,885 instructions # 2.26 insn per cycle - 7.049549267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.563131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.046448e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.046448e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.950021 sec -INFO: No Floating Point Exceptions have been reported - 14,503,935,115 cycles # 2.925 GHz - 32,091,166,775 instructions # 2.21 insn per cycle - 4.960008276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.961733e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.745410e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.745410e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.125348 sec -INFO: No Floating Point Exceptions have been reported - 11,625,768,108 cycles # 2.812 GHz - 19,969,403,537 instructions # 1.72 insn per cycle - 4.135325887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.013648e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.832922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.832922e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.042328 sec -INFO: No Floating Point Exceptions have been reported - 11,410,026,502 cycles # 2.817 GHz - 19,423,165,232 instructions # 1.70 insn per cycle - 4.052144921 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,946,666,904 cycles:u # 2.786 GHz (74.38%) + 6,463,698 stalled-cycles-frontend:u # 0.33% frontend cycles idle (75.50%) + 541,991,512 stalled-cycles-backend:u # 27.84% backend cycles idle (76.24%) + 2,053,991,122 instructions:u # 1.06 insn per cycle + # 0.26 stalled cycles per insn (75.26%) + 0.732014876 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.712486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.267405e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.267405e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.597004 sec -INFO: No Floating Point Exceptions have been reported - 10,166,885,638 cycles # 2.209 GHz - 15,890,691,650 instructions # 1.56 insn per cycle - 4.606740948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165208E-002 + File "", line 1 + me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 1574c6c3cf..45bc3a09d5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,200 +1,42 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:19:01 -DATE: 2024-09-15_11:51:49 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.996142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.634832e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.796610e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.900321 sec -INFO: No Floating Point Exceptions have been reported - 6,197,801,426 cycles # 2.910 GHz - 11,411,789,503 instructions # 1.84 insn per cycle - 2.187318710 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.025883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199963e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199963e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.587430 sec -INFO: No Floating Point Exceptions have been reported - 19,281,256,149 cycles # 2.928 GHz - 46,192,094,635 instructions # 2.40 insn per cycle - 6.597986195 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.548679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019540e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.521143 sec -INFO: No Floating Point Exceptions have been reported - 13,278,378,525 cycles # 2.931 GHz - 31,736,760,460 instructions # 2.39 insn per cycle - 4.531295049 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.952371e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.722642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.722642e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.672885 sec -INFO: No Floating Point Exceptions have been reported - 10,228,932,843 cycles # 2.778 GHz - 19,706,958,837 instructions # 1.93 insn per cycle - 3.682647007 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.003393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.813864e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.813864e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.587790 sec -INFO: No Floating Point Exceptions have been reported - 10,033,694,863 cycles # 2.790 GHz - 19,370,562,804 instructions # 1.93 insn per cycle - 3.597832664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 2,109,189,693 cycles:u # 2.839 GHz (75.50%) + 11,350,976 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.26%) + 553,134,176 stalled-cycles-backend:u # 26.22% backend cycles idle (74.75%) + 2,446,396,221 instructions:u # 1.16 insn per cycle + # 0.23 stalled cycles per insn (74.58%) + 0.768256285 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -202,44 +44,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.709775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272417e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272417e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.131241 sec -INFO: No Floating Point Exceptions have been reported - 8,787,275,470 cycles # 2.123 GHz - 15,836,849,319 instructions # 1.80 insn per cycle - 4.141346630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165208E-002 + File "", line 1 + me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 3b02782d45..a865a53954 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:03:05 -DATE: 2024-09-15_11:08:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.564401e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.700588e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875805e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.684524 sec -INFO: No Floating Point Exceptions have been reported - 2,669,304,279 cycles # 2.860 GHz - 4,081,785,887 instructions # 1.53 insn per cycle - 0.991787338 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.030249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204005e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.555775 sec -INFO: No Floating Point Exceptions have been reported - 19,264,579,235 cycles # 2.934 GHz - 46,142,725,089 instructions # 2.40 insn per cycle - 6.567398103 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.561230e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.041001e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.041001e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.486484 sec -INFO: No Floating Point Exceptions have been reported - 13,180,735,522 cycles # 2.931 GHz - 31,698,753,932 instructions # 2.40 insn per cycle - 4.497601224 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1649) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.959717e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.742175e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.742175e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.661759 sec -INFO: No Floating Point Exceptions have been reported - 10,296,251,645 cycles # 2.804 GHz - 19,686,624,933 instructions # 1.91 insn per cycle - 3.673145773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1895) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.002735e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.826033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826033e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.593177 sec -INFO: No Floating Point Exceptions have been reported - 10,082,197,083 cycles # 2.798 GHz - 19,384,360,663 instructions # 1.92 insn per cycle - 3.604587412 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 790,069,371 cycles:u # 2.227 GHz (75.28%) + 2,433,436 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.64%) + 6,944,825 stalled-cycles-backend:u # 0.88% backend cycles idle (75.52%) + 1,353,780,506 instructions:u # 1.71 insn per cycle + # 0.01 stalled cycles per insn (75.62%) + 0.392071241 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.753760e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344313e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344313e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.042847 sec -INFO: No Floating Point Exceptions have been reported - 8,657,274,459 cycles # 2.136 GHz - 15,708,080,882 instructions # 1.81 insn per cycle - 4.054289402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 834) (512y: 156) (512z: 1237) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165216E-002 + File "", line 1 + me1=; me2=1.2828039868165216E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 9adc226af5..7072c1ce28 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:11:36 -DATE: 2024-09-15_11:33:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.203471e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505439e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.793875e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.692895 sec -INFO: No Floating Point Exceptions have been reported - 2,667,104,303 cycles # 2.870 GHz - 4,197,568,068 instructions # 1.57 insn per cycle - 0.991023921 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.609773e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.065122e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.065122e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.323744 sec -INFO: No Floating Point Exceptions have been reported - 12,680,899,676 cycles # 2.930 GHz - 32,573,373,461 instructions # 2.57 insn per cycle - 4.329822925 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.020941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.881765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.881765e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.528541 sec -INFO: No Floating Point Exceptions have been reported - 10,343,960,768 cycles # 2.928 GHz - 24,660,363,232 instructions # 2.38 insn per cycle - 3.534351751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.219408e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.252697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.252697e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.248651 sec -INFO: No Floating Point Exceptions have been reported - 9,122,079,188 cycles # 2.804 GHz - 16,949,443,243 instructions # 1.86 insn per cycle - 3.254977824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1616) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.281631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.372235e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.372235e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.167737 sec -INFO: No Floating Point Exceptions have been reported - 8,922,630,281 cycles # 2.812 GHz - 16,368,012,425 instructions # 1.83 insn per cycle - 3.174211351 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1352) (512y: 139) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception + 804,560,077 cycles:u # 2.249 GHz (73.97%) + 2,419,360 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.78%) + 7,569,528 stalled-cycles-backend:u # 0.94% backend cycles idle (75.57%) + 1,402,555,928 instructions:u # 1.74 insn per cycle + # 0.01 stalled cycles per insn (73.86%) + 0.394680056 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.953386e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.685128e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.685128e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.635921 sec -INFO: No Floating Point Exceptions have been reported - 7,907,839,436 cycles # 2.172 GHz - 14,593,864,068 instructions # 1.85 insn per cycle - 3.642895717 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 955) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165208E-002 + File "", line 1 + me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index a111e191c2..b1c8c8b726 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:11:38 -DATE: 2024-09-15_11:34:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.369824e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.600320e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.803756e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.688866 sec -INFO: No Floating Point Exceptions have been reported - 2,687,883,365 cycles # 2.879 GHz - 4,137,672,828 instructions # 1.54 insn per cycle - 0.991982760 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.085552e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.924035e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.924035e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.430840 sec -INFO: No Floating Point Exceptions have been reported - 10,018,256,596 cycles # 2.916 GHz - 25,507,694,274 instructions # 2.55 insn per cycle - 3.436494229 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.371301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.639987e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.639987e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.073954 sec -INFO: No Floating Point Exceptions have been reported - 9,025,495,783 cycles # 2.931 GHz - 21,478,170,721 instructions # 2.38 insn per cycle - 3.080490687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868164916E-002 -Relative difference = 1.0277102699700292e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.348565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529213e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529213e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.091062 sec -INFO: No Floating Point Exceptions have been reported - 8,721,037,733 cycles # 2.816 GHz - 15,901,191,500 instructions # 1.82 insn per cycle - 3.097416237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1489) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.428348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696607e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696607e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.003427 sec -INFO: No Floating Point Exceptions have been reported - 8,472,649,935 cycles # 2.816 GHz - 15,622,192,614 instructions # 1.84 insn per cycle - 3.009695803 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception + 820,698,066 cycles:u # 2.317 GHz (75.60%) + 2,218,614 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.41%) + 6,296,901 stalled-cycles-backend:u # 0.77% backend cycles idle (74.13%) + 1,383,289,627 instructions:u # 1.69 insn per cycle + # 0.00 stalled cycles per insn (74.13%) + 0.391638435 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.053185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.879921e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.879921e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.481285 sec -INFO: No Floating Point Exceptions have been reported - 7,632,139,448 cycles # 2.189 GHz - 14,304,829,590 instructions # 1.87 insn per cycle - 3.488200715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1032) (512y: 164) (512z: 877) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039868165216E-002 + File "", line 1 + me1=; me2=1.2828039868165216E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index bc5233a5ba..4db4cc248b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:03:07 -DATE: 2024-09-15_11:09:06 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.192132e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.336696e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.300693e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585451 sec -INFO: No Floating Point Exceptions have been reported - 2,336,870,734 cycles # 2.880 GHz - 3,644,936,097 instructions # 1.56 insn per cycle - 0.870296260 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.072651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.269381e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269381e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.255061 sec -INFO: No Floating Point Exceptions have been reported - 18,355,246,050 cycles # 2.931 GHz - 45,043,077,667 instructions # 2.45 insn per cycle - 6.263286658 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.241713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.430745e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.430745e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.200925 sec -INFO: No Floating Point Exceptions have been reported - 9,386,491,422 cycles # 2.926 GHz - 22,329,398,339 instructions # 2.38 insn per cycle - 3.208910381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.404117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.697492e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.697492e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.006682 sec -INFO: No Floating Point Exceptions have been reported - 8,484,958,572 cycles # 2.815 GHz - 15,797,352,563 instructions # 1.86 insn per cycle - 3.014624816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.426806e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765840e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765840e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.982618 sec -INFO: No Floating Point Exceptions have been reported - 8,401,165,701 cycles # 2.811 GHz - 15,653,777,374 instructions # 1.86 insn per cycle - 2.990373231 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 766,476,010 cycles:u # 2.323 GHz (74.78%) + 2,430,705 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.86%) + 6,550,118 stalled-cycles-backend:u # 0.85% backend cycles idle (76.34%) + 1,341,880,376 instructions:u # 1.75 insn per cycle + # 0.00 stalled cycles per insn (75.25%) + 0.390122279 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.426463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.722731e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.722731e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.987827 sec -INFO: No Floating Point Exceptions have been reported - 6,753,744,387 cycles # 2.255 GHz - 12,906,211,238 instructions # 1.91 insn per cycle - 2.995926915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 95b8681521..18f7684bf7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,45 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:17:00 -DATE: 2024-09-15_11:45:42 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378449e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.378449e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.944337 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,200,765,179 cycles # 2.831 GHz - 10,073,714,089 instructions # 1.62 insn per cycle - 2.274311561 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 2,121,384,504 cycles:u # 2.853 GHz (75.66%) + 19,652,587 stalled-cycles-frontend:u # 0.93% frontend cycles idle (74.90%) + 551,623,680 stalled-cycles-backend:u # 26.00% backend cycles idle (74.38%) + 2,447,211,339 instructions:u # 1.15 insn per cycle + # 0.23 stalled cycles per insn (74.70%) + 0.779212203 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,184 +47,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.013177e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.196081e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.753252 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,140,260,721 cycles # 2.851 GHz - 45,281,984,182 instructions # 2.37 insn per cycle - 6.770979415 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.075255e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.101362e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.101362e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.587325 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,243,661,246 cycles # 2.856 GHz - 23,736,113,257 instructions # 2.32 insn per cycle - 3.601313820 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.208611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.299964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.299964e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.407535 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,325,104,909 cycles # 2.739 GHz - 16,992,883,294 instructions # 1.82 insn per cycle - 3.420829574 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.223475e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.357947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.357947e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.391442 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,279,675,392 cycles # 2.737 GHz - 16,862,711,706 instructions # 1.82 insn per cycle - 3.405369019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.268012e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.359028e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.359028e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.328677 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,543,875,114 cycles # 2.260 GHz - 14,180,005,728 instructions # 1.88 insn per cycle - 3.340981281 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 15fa7d3112..e386e5b810 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:19:37 -DATE: 2024-09-15_11:58:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.336762e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.510278e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.479828e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.311031 sec -INFO: No Floating Point Exceptions have been reported - 4,430,467,531 cycles # 2.889 GHz - 6,960,795,222 instructions # 1.57 insn per cycle - 1.590735457 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.075223e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270769e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270769e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.687798 sec -INFO: No Floating Point Exceptions have been reported - 19,629,378,634 cycles # 2.933 GHz - 45,588,143,016 instructions # 2.32 insn per cycle - 6.694819566 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.248280e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.433718e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.433718e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.634348 sec -INFO: No Floating Point Exceptions have been reported - 10,674,006,930 cycles # 2.932 GHz - 22,771,305,471 instructions # 2.13 insn per cycle - 3.641620548 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.377112e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.647798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.480978 sec -INFO: No Floating Point Exceptions have been reported - 9,770,888,729 cycles # 2.802 GHz - 16,055,948,307 instructions # 1.64 insn per cycle - 3.487814115 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.421430e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770475e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.434581 sec -INFO: No Floating Point Exceptions have been reported - 9,740,474,003 cycles # 2.831 GHz - 15,722,386,015 instructions # 1.61 insn per cycle - 3.441655213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,910,318,117 cycles:u # 2.899 GHz (75.03%) + 14,858,846 stalled-cycles-frontend:u # 0.78% frontend cycles idle (76.42%) + 549,077,433 stalled-cycles-backend:u # 28.74% backend cycles idle (75.40%) + 2,015,487,344 instructions:u # 1.06 insn per cycle + # 0.27 stalled cycles per insn (75.11%) + 0.688448405 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.441505e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.731463e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.731463e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.419856 sec -INFO: No Floating Point Exceptions have been reported - 8,031,724,309 cycles # 2.344 GHz - 12,960,768,751 instructions # 1.61 insn per cycle - 3.427508239 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 6589d6b6fa..116cdbf3e7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,200 +1,42 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:19:03 -DATE: 2024-09-15_11:52:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.003631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.641927e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.671119e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.488823 sec -INFO: No Floating Point Exceptions have been reported - 4,967,613,517 cycles # 2.899 GHz - 9,171,831,308 instructions # 1.85 insn per cycle - 1.769936667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.073262e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267974e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267974e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.250612 sec -INFO: No Floating Point Exceptions have been reported - 18,322,110,309 cycles # 2.929 GHz - 45,051,388,062 instructions # 2.46 insn per cycle - 6.257775360 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.247339e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424409e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424409e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.190677 sec -INFO: No Floating Point Exceptions have been reported - 9,359,387,266 cycles # 2.928 GHz - 22,331,498,291 instructions # 2.39 insn per cycle - 3.197654484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.363785e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.679448e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.679448e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.047896 sec -INFO: No Floating Point Exceptions have been reported - 8,583,132,130 cycles # 2.811 GHz - 15,806,350,534 instructions # 1.84 insn per cycle - 3.054826008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.430638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.771124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.771124e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.974287 sec -INFO: No Floating Point Exceptions have been reported - 8,401,059,167 cycles # 2.818 GHz - 15,651,581,046 instructions # 1.86 insn per cycle - 2.981875735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 2,081,336,434 cycles:u # 2.902 GHz (75.47%) + 20,574,722 stalled-cycles-frontend:u # 0.99% frontend cycles idle (75.72%) + 556,746,417 stalled-cycles-backend:u # 26.75% backend cycles idle (75.25%) + 2,423,831,594 instructions:u # 1.16 insn per cycle + # 0.23 stalled cycles per insn (74.31%) + 0.741698760 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -202,44 +44,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.438697e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736996e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.973534 sec -INFO: No Floating Point Exceptions have been reported - 6,722,109,548 cycles # 2.256 GHz - 12,906,606,049 instructions # 1.92 insn per cycle - 2.981153680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index bed528f6e7..ff44ec4aeb 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:03:09 -DATE: 2024-09-15_11:09:33 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.185244e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.645179e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.802907e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.581560 sec -INFO: No Floating Point Exceptions have been reported - 2,321,713,730 cycles # 2.869 GHz - 3,648,873,879 instructions # 1.57 insn per cycle - 0.865547071 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.067642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.262437e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262437e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.278398 sec -INFO: No Floating Point Exceptions have been reported - 18,394,901,899 cycles # 2.927 GHz - 45,013,341,285 instructions # 2.45 insn per cycle - 6.286516700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.249815e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.432877e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.432877e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.188261 sec -INFO: No Floating Point Exceptions have been reported - 9,382,779,388 cycles # 2.937 GHz - 22,291,184,899 instructions # 2.38 insn per cycle - 3.196123670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1939) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.394804e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683014e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.683014e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.016616 sec -INFO: No Floating Point Exceptions have been reported - 8,501,260,075 cycles # 2.812 GHz - 15,791,303,131 instructions # 1.86 insn per cycle - 3.024850695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2539) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.433401e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.784502e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784502e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.973425 sec -INFO: No Floating Point Exceptions have been reported - 8,414,276,106 cycles # 2.823 GHz - 15,633,261,481 instructions # 1.86 insn per cycle - 2.981340876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2436) (512y: 12) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 764,123,086 cycles:u # 2.338 GHz (74.12%) + 2,314,510 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.68%) + 6,276,540 stalled-cycles-backend:u # 0.82% backend cycles idle (74.96%) + 1,379,052,675 instructions:u # 1.80 insn per cycle + # 0.00 stalled cycles per insn (75.94%) + 0.364419040 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.449856e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.750896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.750896e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.962493 sec -INFO: No Floating Point Exceptions have been reported - 6,702,761,235 cycles # 2.257 GHz - 12,885,740,598 instructions # 1.92 insn per cycle - 2.970728824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1667) (512y: 18) (512z: 1428) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052564145764E-002 -Relative difference = 1.9988585667912256e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 711fbf3a50..f9b9273f50 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:11:41 -DATE: 2024-09-15_11:34:45 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.272057e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.453757e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.411368e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.577320 sec -INFO: No Floating Point Exceptions have been reported - 2,322,367,280 cycles # 2.881 GHz - 3,616,476,077 instructions # 1.56 insn per cycle - 0.862621614 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.635657e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129532e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129532e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.218771 sec -INFO: No Floating Point Exceptions have been reported - 12,191,913,623 cycles # 2.887 GHz - 32,293,306,178 instructions # 2.65 insn per cycle - 4.224304323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039840314887E-002 -Relative difference = 1.244813035273009e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.654215e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.464911e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.464911e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.746425 sec -INFO: No Floating Point Exceptions have been reported - 8,013,864,577 cycles # 2.914 GHz - 18,725,751,725 instructions # 2.34 insn per cycle - 2.751635696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039283704129E-002 -Relative difference = 5.583829420356249e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.734762e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.516819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.516819e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.676523 sec -INFO: No Floating Point Exceptions have been reported - 7,476,186,846 cycles # 2.791 GHz - 14,257,923,546 instructions # 1.91 insn per cycle - 2.682062632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.834242e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.778618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.778618e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.592675 sec -INFO: No Floating Point Exceptions have been reported - 7,344,696,907 cycles # 2.828 GHz - 13,952,931,831 instructions # 1.90 insn per cycle - 2.598198803 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception + 791,028,835 cycles:u # 2.408 GHz (73.43%) + 2,343,117 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.58%) + 7,158,894 stalled-cycles-backend:u # 0.91% backend cycles idle (75.68%) + 1,370,401,779 instructions:u # 1.73 insn per cycle + # 0.01 stalled cycles per insn (75.79%) + 0.365233728 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.491060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.875896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.875896e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.903149 sec -INFO: No Floating Point Exceptions have been reported - 6,571,286,194 cycles # 2.260 GHz - 13,433,545,963 instructions # 2.04 insn per cycle - 2.908820313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 1) (512z: 1199) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052562326775E-002 -Relative difference = 1.997440588685788e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 6fc527ffa1..5031f9b51b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:11:43 -DATE: 2024-09-15_11:35:09 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.289380e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.618768e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.817817e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.580301 sec -INFO: No Floating Point Exceptions have been reported - 2,323,629,755 cycles # 2.848 GHz - 3,593,641,981 instructions # 1.55 insn per cycle - 0.873274895 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.209448e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.220446e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.220446e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.220192 sec -INFO: No Floating Point Exceptions have been reported - 9,366,774,041 cycles # 2.905 GHz - 25,702,432,609 instructions # 2.74 insn per cycle - 3.225730639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039838495897E-002 -Relative difference = 1.2589928273811243e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.014104e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.557363e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.557363e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.462960 sec -INFO: No Floating Point Exceptions have been reported - 7,216,847,131 cycles # 2.925 GHz - 16,891,846,951 instructions # 2.34 insn per cycle - 2.468502980 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.924187e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.020326e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.020326e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.522608 sec -INFO: No Floating Point Exceptions have been reported - 7,150,122,380 cycles # 2.829 GHz - 13,633,449,373 instructions # 1.91 insn per cycle - 2.528205937 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.976818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.175866e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.175866e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.485267 sec -INFO: No Floating Point Exceptions have been reported - 7,047,642,186 cycles # 2.830 GHz - 13,442,931,038 instructions # 1.91 insn per cycle - 2.490839699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1940) (512y: 4) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception + 771,335,849 cycles:u # 2.360 GHz (75.28%) + 2,446,075 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.46%) + 7,240,951 stalled-cycles-backend:u # 0.94% backend cycles idle (75.56%) + 1,402,589,103 instructions:u # 1.82 insn per cycle + # 0.01 stalled cycles per insn (73.98%) + 0.368335826 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.604837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.103202e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.103202e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.790744 sec -INFO: No Floating Point Exceptions have been reported - 6,349,721,778 cycles # 2.272 GHz - 13,164,680,615 instructions # 2.07 insn per cycle - 2.796235299 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2033) (512y: 1) (512z: 1085) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052536860923E-002 -Relative difference = 1.977588895209662e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828036060454906E-002 + File "", line 1 + me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index caa67d1a4c..eb2412c3dc 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:03:12 -DATE: 2024-09-15_11:10:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.610039e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.567106e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.762593e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.688497 sec -INFO: No Floating Point Exceptions have been reported - 2,665,329,968 cycles # 2.845 GHz - 4,055,682,627 instructions # 1.52 insn per cycle - 0.995154695 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.002881e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168467e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.168467e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.726627 sec -INFO: No Floating Point Exceptions have been reported - 19,724,365,546 cycles # 2.928 GHz - 46,388,641,620 instructions # 2.35 insn per cycle - 6.737968541 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039952548879E-002 -Relative difference = 3.6990156841838714e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.617185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.143896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143896e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.347629 sec -INFO: No Floating Point Exceptions have been reported - 12,771,945,524 cycles # 2.931 GHz - 31,577,972,239 instructions # 2.47 insn per cycle - 4.359278192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1719) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039952548879E-002 -Relative difference = 3.6990156841838714e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.943978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.720569e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.720569e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.688531 sec -INFO: No Floating Point Exceptions have been reported - 10,322,037,008 cycles # 2.790 GHz - 19,570,801,424 instructions # 1.90 insn per cycle - 3.699996508 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.986657e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.789520e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789520e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.618212 sec -INFO: No Floating Point Exceptions have been reported - 10,149,499,266 cycles # 2.797 GHz - 19,312,096,557 instructions # 1.90 insn per cycle - 3.629679706 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1785) (512y: 189) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 801,797,045 cycles:u # 2.250 GHz (75.44%) + 2,260,290 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.18%) + 8,048,661 stalled-cycles-backend:u # 1.00% backend cycles idle (75.37%) + 1,409,315,454 instructions:u # 1.76 insn per cycle + # 0.01 stalled cycles per insn (73.94%) + 0.396528391 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.777391e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385382e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385382e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.992684 sec -INFO: No Floating Point Exceptions have been reported - 8,588,251,503 cycles # 2.146 GHz - 15,161,251,122 instructions # 1.77 insn per cycle - 4.003805537 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 954) (512y: 154) (512z: 1322) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039901590281E-002 + File "", line 1 + me1=; me2=1.2828039901590281E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index ce1b16067d..7523130c83 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-17_09:03:14 -DATE: 2024-09-15_11:10:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.695377e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640031e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.828039e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.676129 sec -INFO: No Floating Point Exceptions have been reported - 2,632,225,960 cycles # 2.883 GHz - 4,132,384,248 instructions # 1.57 insn per cycle - 0.970493332 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.005676e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176899e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.735465 sec -INFO: No Floating Point Exceptions have been reported - 19,720,225,593 cycles # 2.924 GHz - 46,326,489,596 instructions # 2.35 insn per cycle - 6.746197968 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039952548879E-002 -Relative difference = 3.6990156841838714e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.574820e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.144081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.144081e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.449866 sec -INFO: No Floating Point Exceptions have been reported - 13,065,779,841 cycles # 2.930 GHz - 31,555,443,434 instructions # 2.42 insn per cycle - 4.460852067 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1711) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039952548879E-002 -Relative difference = 3.6990156841838714e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.952135e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.730440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.730440e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.677079 sec -INFO: No Floating Point Exceptions have been reported - 10,320,566,663 cycles # 2.800 GHz - 19,557,785,526 instructions # 1.90 insn per cycle - 3.688245631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2026) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.981919e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.782784e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.782784e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.626089 sec -INFO: No Floating Point Exceptions have been reported - 10,150,645,903 cycles # 2.793 GHz - 19,388,040,023 instructions # 1.91 insn per cycle - 3.637342012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1779) (512y: 189) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 800,037,746 cycles:u # 2.261 GHz (74.89%) + 2,538,107 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.80%) + 6,828,914 stalled-cycles-backend:u # 0.85% backend cycles idle (75.63%) + 1,342,253,081 instructions:u # 1.68 insn per cycle + # 0.01 stalled cycles per insn (74.78%) + 0.391601204 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.806136e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.449559e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.449559e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.931921 sec -INFO: No Floating Point Exceptions have been reported - 8,442,748,276 cycles # 2.150 GHz - 15,068,523,446 instructions # 1.78 insn per cycle - 3.943167549 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 947) (512y: 156) (512z: 1306) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.2828039901590284E-002 + File "", line 1 + me1=; me2=1.2828039901590284E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index aeadfaae64..11cb6d94bd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:16 -DATE: 2024-09-15_11:11:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.391981e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.330443e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949573e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.534465 sec -INFO: No Floating Point Exceptions have been reported - 2,210,485,527 cycles # 2.869 GHz - 3,136,829,588 instructions # 1.42 insn per cycle - 0.828432932 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.818281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865529e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.911600 sec -INFO: No Floating Point Exceptions have been reported - 17,389,649,504 cycles # 2.935 GHz - 46,036,709,188 instructions # 2.65 insn per cycle - 5.925127688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.165855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.325075e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.453922 sec -INFO: No Floating Point Exceptions have been reported - 10,171,046,914 cycles # 2.936 GHz - 27,937,548,503 instructions # 2.75 insn per cycle - 3.465600263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.967598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.358232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.358232e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.252729 sec -INFO: No Floating Point Exceptions have been reported - 6,219,848,194 cycles # 2.748 GHz - 12,677,070,824 instructions # 2.04 insn per cycle - 2.263945260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.478036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.948706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.948706e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.055945 sec -INFO: No Floating Point Exceptions have been reported - 5,693,440,562 cycles # 2.756 GHz - 12,116,317,958 instructions # 2.13 insn per cycle - 2.067013514 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 869,576,390 cycles:u # 0.583 GHz (75.13%) + 2,440,335 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.50%) + 6,206,852 stalled-cycles-backend:u # 0.71% backend cycles idle (75.25%) + 1,476,984,592 instructions:u # 1.70 insn per cycle + # 0.00 stalled cycles per insn (74.77%) + 1.545981711 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.483396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.667202e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.667202e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.151048 sec -INFO: No Floating Point Exceptions have been reported - 5,836,401,977 cycles # 1.846 GHz - 8,391,475,751 instructions # 1.44 insn per cycle - 3.162234928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 9022013b0c..9c9e966a48 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,45 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:17:03 -DATE: 2024-09-15_11:46:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.381047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.782856e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.782856e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.840023 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,108,426,192 cycles # 2.861 GHz - 4,770,924,698 instructions # 1.53 insn per cycle - 1.146594198 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 2,249,015,642 cycles:u # 2.777 GHz (74.62%) + 11,048,040 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.43%) + 556,416,400 stalled-cycles-backend:u # 24.74% backend cycles idle (74.13%) + 2,529,763,771 instructions:u # 1.12 insn per cycle + # 0.22 stalled cycles per insn (74.78%) + 0.846028883 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,184 +47,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.806159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.852377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.852377e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.067320 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,856,813,573 cycles # 2.936 GHz - 46,243,571,751 instructions # 2.59 insn per cycle - 6.083398130 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.134236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.289007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.289007e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.611222 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,640,342,598 cycles # 2.934 GHz - 28,274,377,614 instructions # 2.66 insn per cycle - 3.627839941 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.918119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.295386e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.295386e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.398079 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,685,318,792 cycles # 2.770 GHz - 13,122,453,026 instructions # 1.96 insn per cycle - 2.414568983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.370026e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.821205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.821205e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.218649 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,189,711,182 cycles # 2.770 GHz - 12,557,371,407 instructions # 2.03 insn per cycle - 2.235322482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.469847e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.650575e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.650575e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.290950 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,320,487,640 cycles # 1.913 GHz - 8,791,643,966 instructions # 1.39 insn per cycle - 3.307886654 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 85f95aac4c..ac00831180 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:19:39 -DATE: 2024-09-15_11:58:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.424217e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.466051e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.011391e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.646400 sec -INFO: No Floating Point Exceptions have been reported - 2,528,248,856 cycles # 2.879 GHz - 3,688,196,917 instructions # 1.46 insn per cycle - 0.934986871 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.819736e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.867152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.867152e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.985044 sec -INFO: No Floating Point Exceptions have been reported - 17,600,111,411 cycles # 2.936 GHz - 46,124,554,790 instructions # 2.62 insn per cycle - 5.995774241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.168097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.326675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.326675e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.533047 sec -INFO: No Floating Point Exceptions have been reported - 10,405,596,477 cycles # 2.937 GHz - 28,016,084,485 instructions # 2.69 insn per cycle - 3.543840924 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.000915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.392912e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.392912e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.319991 sec -INFO: No Floating Point Exceptions have been reported - 6,443,899,577 cycles # 2.766 GHz - 12,743,367,354 instructions # 1.98 insn per cycle - 2.330731733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.453355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.917864e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.917864e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.148765 sec -INFO: No Floating Point Exceptions have been reported - 5,965,699,512 cycles # 2.764 GHz - 12,146,978,501 instructions # 2.04 insn per cycle - 2.159407986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,995,561,443 cycles:u # 2.801 GHz (74.88%) + 6,510,105 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.93%) + 549,438,216 stalled-cycles-backend:u # 27.53% backend cycles idle (74.45%) + 2,170,651,447 instructions:u # 1.09 insn per cycle + # 0.25 stalled cycles per insn (75.43%) + 0.742750781 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.499391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.683844e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.221377 sec -INFO: No Floating Point Exceptions have been reported - 6,080,803,082 cycles # 1.882 GHz - 8,423,087,351 instructions # 1.39 insn per cycle - 3.232264502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index fa08fbada3..15956dbce8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,200 +1,42 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:19:06 -DATE: 2024-09-15_11:52:49 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.726431e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.401767e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.003069e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.719407 sec -INFO: No Floating Point Exceptions have been reported - 2,718,962,196 cycles # 2.853 GHz - 4,261,744,999 instructions # 1.57 insn per cycle - 1.009279183 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.804236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.851320e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.851320e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.958188 sec -INFO: No Floating Point Exceptions have been reported - 17,386,557,790 cycles # 2.923 GHz - 46,053,036,463 instructions # 2.65 insn per cycle - 5.968882862 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.150158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308857e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.477756 sec -INFO: No Floating Point Exceptions have been reported - 10,179,732,087 cycles # 2.919 GHz - 27,956,952,229 instructions # 2.75 insn per cycle - 3.488776572 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.931728e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.313640e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.313640e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.272950 sec -INFO: No Floating Point Exceptions have been reported - 6,250,280,118 cycles # 2.738 GHz - 12,699,256,189 instructions # 2.03 insn per cycle - 2.283692354 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.454076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.923276e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.923276e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.069149 sec -INFO: No Floating Point Exceptions have been reported - 5,725,799,884 cycles # 2.754 GHz - 12,135,179,967 instructions # 2.12 insn per cycle - 2.079783939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 2,195,815,390 cycles:u # 2.880 GHz (75.56%) + 10,892,913 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.02%) + 547,003,488 stalled-cycles-backend:u # 24.91% backend cycles idle (74.82%) + 2,513,157,300 instructions:u # 1.14 insn per cycle + # 0.22 stalled cycles per insn (74.55%) + 0.788757141 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -202,44 +44,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.414550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592343e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.218110 sec -INFO: No Floating Point Exceptions have been reported - 5,959,079,250 cycles # 1.847 GHz - 8,422,189,176 instructions # 1.41 insn per cycle - 3.228977501 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 328467ef63..c0bd5870cd 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:20 -DATE: 2024-09-15_11:11:27 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.368553e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.328924e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.965685e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.531897 sec -INFO: No Floating Point Exceptions have been reported - 2,204,349,816 cycles # 2.864 GHz - 3,169,634,690 instructions # 1.44 insn per cycle - 0.825649601 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.870268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919754e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919754e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.735202 sec -INFO: No Floating Point Exceptions have been reported - 16,847,713,247 cycles # 2.933 GHz - 44,981,738,957 instructions # 2.67 insn per cycle - 5.744748484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.300705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.473658e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.473658e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.304655 sec -INFO: No Floating Point Exceptions have been reported - 9,659,083,497 cycles # 2.916 GHz - 26,749,720,361 instructions # 2.77 insn per cycle - 3.314062418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2328) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.583161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.907950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.907950e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.415079 sec -INFO: No Floating Point Exceptions have been reported - 6,675,336,151 cycles # 2.753 GHz - 14,174,925,457 instructions # 2.12 insn per cycle - 2.425332683 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.674600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.017717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.017717e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.378233 sec -INFO: No Floating Point Exceptions have been reported - 6,574,155,578 cycles # 2.754 GHz - 13,789,180,928 instructions # 2.10 insn per cycle - 2.388565062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 297) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 897,157,021 cycles:u # 2.392 GHz (75.81%) + 2,363,003 stalled-cycles-frontend:u # 0.26% frontend cycles idle (76.35%) + 5,504,469 stalled-cycles-backend:u # 0.61% backend cycles idle (74.94%) + 1,378,212,077 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (74.78%) + 0.411268664 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.383516e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.554686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.554686e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.229855 sec -INFO: No Floating Point Exceptions have been reported - 5,994,389,719 cycles # 1.851 GHz - 10,123,629,860 instructions # 1.69 insn per cycle - 3.240029027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 2da881e2b3..42bc47a590 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:11:45 -DATE: 2024-09-15_11:35:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.302471e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.316570e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001729e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.535501 sec -INFO: No Floating Point Exceptions have been reported - 2,215,078,191 cycles # 2.874 GHz - 3,154,679,095 instructions # 1.42 insn per cycle - 0.829146554 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.351910e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.430033e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.430033e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.571525 sec -INFO: No Floating Point Exceptions have been reported - 13,035,401,690 cycles # 2.848 GHz - 34,355,905,973 instructions # 2.64 insn per cycle - 4.578322526 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.974205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.110138e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.110138e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.639722 sec -INFO: No Floating Point Exceptions have been reported - 10,720,308,622 cycles # 2.941 GHz - 24,027,850,859 instructions # 2.24 insn per cycle - 3.646936507 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.622097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.949479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.949479e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.385273 sec -INFO: No Floating Point Exceptions have been reported - 6,607,425,584 cycles # 2.762 GHz - 12,368,604,074 instructions # 1.87 insn per cycle - 2.392729796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3103) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.850394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.302393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.302393e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.279711 sec -INFO: No Floating Point Exceptions have been reported - 6,291,615,830 cycles # 2.752 GHz - 11,595,311,145 instructions # 1.84 insn per cycle - 2.287442889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2648) (512y: 239) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception + 913,485,140 cycles:u # 2.425 GHz (74.81%) + 2,415,774 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) + 6,504,578 stalled-cycles-backend:u # 0.71% backend cycles idle (74.55%) + 1,437,465,825 instructions:u # 1.57 insn per cycle + # 0.00 stalled cycles per insn (72.92%) + 0.415170761 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.743982e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.952525e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.952525e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.921108 sec -INFO: No Floating Point Exceptions have been reported - 5,423,773,794 cycles # 1.852 GHz - 9,310,782,229 instructions # 1.72 insn per cycle - 2.929084188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 282) (512z: 1955) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 86df224c90..998edbd61e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:11:48 -DATE: 2024-09-15_11:35:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.255241e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.245059e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949681e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.533151 sec -INFO: No Floating Point Exceptions have been reported - 2,204,451,822 cycles # 2.864 GHz - 3,124,206,943 instructions # 1.42 insn per cycle - 0.826948420 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.566402e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.659710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.659710e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.199423 sec -INFO: No Floating Point Exceptions have been reported - 12,339,181,649 cycles # 2.934 GHz - 34,922,451,175 instructions # 2.83 insn per cycle - 4.206438418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515649 -Relative difference = 3.258803992249869e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.977481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.113067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.113067e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.637534 sec -INFO: No Floating Point Exceptions have been reported - 10,710,920,290 cycles # 2.939 GHz - 23,032,620,692 instructions # 2.15 insn per cycle - 3.644897421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2340) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388515654 -Relative difference = 3.2588039900609506e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.909385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.279214e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.279214e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.252873 sec -INFO: No Floating Point Exceptions have been reported - 6,212,002,997 cycles # 2.749 GHz - 11,978,645,016 instructions # 1.93 insn per cycle - 2.260347594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2491) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.039032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.423530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.423530e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.196002 sec -INFO: No Floating Point Exceptions have been reported - 6,062,556,643 cycles # 2.753 GHz - 11,146,456,018 instructions # 1.84 insn per cycle - 2.203425956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2103) (512y: 174) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception + 894,868,548 cycles:u # 2.368 GHz (73.84%) + 2,559,865 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.80%) + 6,911,754 stalled-cycles-backend:u # 0.77% backend cycles idle (73.68%) + 1,409,508,267 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (74.06%) + 0.415505974 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.871031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.091641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.091641e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.826553 sec -INFO: No Floating Point Exceptions have been reported - 5,265,704,314 cycles # 1.859 GHz - 9,046,022,125 instructions # 1.72 insn per cycle - 2.834187629 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1571) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063388516817 + File "", line 1 + me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index e1d11759a7..23c1c53fc2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:22 -DATE: 2024-09-15_11:11:52 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.179768e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.708203e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.827426e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.491224 sec -INFO: No Floating Point Exceptions have been reported - 2,056,714,115 cycles # 2.865 GHz - 2,916,773,309 instructions # 1.42 insn per cycle - 0.776029796 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.918838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.972832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.972832e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.552302 sec -INFO: No Floating Point Exceptions have been reported - 16,247,282,670 cycles # 2.924 GHz - 45,328,928,537 instructions # 2.79 insn per cycle - 5.557963082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.529514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.866293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.866293e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.402922 sec -INFO: No Floating Point Exceptions have been reported - 7,055,912,070 cycles # 2.931 GHz - 17,768,218,222 instructions # 2.52 insn per cycle - 2.408607319 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.299822e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.410195e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.410195e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.350007 sec -INFO: No Floating Point Exceptions have been reported - 3,747,874,250 cycles # 2.767 GHz - 8,260,976,747 instructions # 2.20 insn per cycle - 1.355686963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.794382e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005480e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278305 sec -INFO: No Floating Point Exceptions have been reported - 3,550,706,297 cycles # 2.767 GHz - 7,915,681,558 instructions # 2.23 insn per cycle - 1.284036639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 805,132,482 cycles:u # 2.412 GHz (74.89%) + 2,287,780 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.96%) + 6,669,480 stalled-cycles-backend:u # 0.83% backend cycles idle (76.06%) + 1,358,609,559 instructions:u # 1.69 insn per cycle + # 0.00 stalled cycles per insn (76.47%) + 0.372925518 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.489307e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.134354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.134354e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.705925 sec -INFO: No Floating Point Exceptions have been reported - 3,272,576,419 cycles # 1.913 GHz - 6,103,138,487 instructions # 1.86 insn per cycle - 1.712010321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 4785fec175..bdaeefcf25 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,45 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:17:06 -DATE: 2024-09-15_11:46:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.985126e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.401873e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.401873e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.682896 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,622,267,762 cycles # 2.874 GHz - 4,057,326,622 instructions # 1.55 insn per cycle - 0.970912293 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 2,132,023,795 cycles:u # 2.859 GHz (75.45%) + 19,229,872 stalled-cycles-frontend:u # 0.90% frontend cycles idle (75.68%) + 542,899,446 stalled-cycles-backend:u # 25.46% backend cycles idle (75.08%) + 2,476,029,988 instructions:u # 1.16 insn per cycle + # 0.22 stalled cycles per insn (74.93%) + 0.780368458 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,184 +47,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.900194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953379e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953379e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.681942 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 16,671,667,536 cycles # 2.929 GHz - 45,497,192,820 instructions # 2.73 insn per cycle - 5.692941265 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.487658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829841e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.506047 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,392,710,973 cycles # 2.938 GHz - 18,162,302,699 instructions # 2.46 insn per cycle - 2.517172183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.160433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.262111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.262111e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.454756 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,077,667,192 cycles # 2.783 GHz - 8,611,395,195 instructions # 2.11 insn per cycle - 1.466232607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.644244e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.901065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.901065e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.382676 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,878,507,500 cycles # 2.784 GHz - 8,265,873,907 instructions # 2.13 insn per cycle - 1.393862906 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.387022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022226e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.814510 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,594,832,163 cycles # 1.970 GHz - 6,462,220,806 instructions # 1.80 insn per cycle - 1.825958297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index ff1a0d1a39..6e8a5eba1a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:19:42 -DATE: 2024-09-15_11:58:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.402978e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.823028e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.958083e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.595970 sec -INFO: No Floating Point Exceptions have been reported - 2,351,354,548 cycles # 2.876 GHz - 3,444,506,671 instructions # 1.46 insn per cycle - 0.875001238 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.915967e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970379e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970379e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.649395 sec -INFO: No Floating Point Exceptions have been reported - 16,580,294,518 cycles # 2.931 GHz - 45,471,304,380 instructions # 2.74 insn per cycle - 5.657307639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.539990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.880360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.880360e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.485589 sec -INFO: No Floating Point Exceptions have been reported - 7,332,359,110 cycles # 2.941 GHz - 17,888,678,821 instructions # 2.44 insn per cycle - 2.493593708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.267444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.414148e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.414148e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.445011 sec -INFO: No Floating Point Exceptions have been reported - 4,026,136,405 cycles # 2.773 GHz - 8,355,233,205 instructions # 2.08 insn per cycle - 1.452615562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.729960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.002290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002290e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.378766 sec -INFO: No Floating Point Exceptions have been reported - 3,842,420,700 cycles # 2.773 GHz - 7,976,068,469 instructions # 2.08 insn per cycle - 1.386498519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,936,285,217 cycles:u # 2.897 GHz (74.84%) + 15,064,538 stalled-cycles-frontend:u # 0.78% frontend cycles idle (74.88%) + 545,299,393 stalled-cycles-backend:u # 28.16% backend cycles idle (75.04%) + 2,093,072,887 instructions:u # 1.08 insn per cycle + # 0.26 stalled cycles per insn (74.42%) + 0.700962558 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.464492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.118413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.118413e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.801449 sec -INFO: No Floating Point Exceptions have been reported - 3,544,493,993 cycles # 1.960 GHz - 6,155,712,678 instructions # 1.74 insn per cycle - 1.809068044 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index a7d3a3bcad..c3fba2dc3b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,200 +1,42 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:19:09 -DATE: 2024-09-15_11:53:15 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.603732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.769962e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.882903e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.632677 sec -INFO: No Floating Point Exceptions have been reported - 2,459,588,784 cycles # 2.873 GHz - 3,830,411,115 instructions # 1.56 insn per cycle - 0.912203024 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.920423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975843e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.564298 sec -INFO: No Floating Point Exceptions have been reported - 16,322,815,770 cycles # 2.930 GHz - 45,379,862,622 instructions # 2.78 insn per cycle - 5.571986399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.531426e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870727e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870727e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.417527 sec -INFO: No Floating Point Exceptions have been reported - 7,119,927,533 cycles # 2.937 GHz - 17,819,194,741 instructions # 2.50 insn per cycle - 2.425172051 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.302342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.447707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.447707e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.366211 sec -INFO: No Floating Point Exceptions have been reported - 3,809,953,710 cycles # 2.774 GHz - 8,311,255,796 instructions # 2.18 insn per cycle - 1.374030854 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.793036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010397e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010397e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.299199 sec -INFO: No Floating Point Exceptions have been reported - 3,629,527,941 cycles # 2.777 GHz - 7,964,563,950 instructions # 2.19 insn per cycle - 1.308355970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 2,119,458,360 cycles:u # 2.954 GHz (74.36%) + 20,115,822 stalled-cycles-frontend:u # 0.95% frontend cycles idle (75.34%) + 552,306,849 stalled-cycles-backend:u # 26.06% backend cycles idle (75.48%) + 2,469,159,789 instructions:u # 1.16 insn per cycle + # 0.22 stalled cycles per insn (75.30%) + 0.742367314 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -202,44 +44,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.490800e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.161305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.161305e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.720915 sec -INFO: No Floating Point Exceptions have been reported - 3,334,705,650 cycles # 1.929 GHz - 6,144,839,228 instructions # 1.84 insn per cycle - 1.729518385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 1a7a19dcfe..22666b8d44 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:24 -DATE: 2024-09-15_11:12:13 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.207213e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.745203e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857795e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487469 sec -INFO: No Floating Point Exceptions have been reported - 2,053,225,612 cycles # 2.869 GHz - 2,963,841,312 instructions # 1.44 insn per cycle - 0.771871582 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.964254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021824e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021824e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.438732 sec -INFO: No Floating Point Exceptions have been reported - 16,004,474,289 cycles # 2.939 GHz - 44,480,990,455 instructions # 2.78 insn per cycle - 5.447104045 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.270854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.739248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.739248e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.093047 sec -INFO: No Floating Point Exceptions have been reported - 6,146,902,561 cycles # 2.927 GHz - 17,124,330,277 instructions # 2.79 insn per cycle - 2.101358753 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.010634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.590643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.590643e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.845256 sec -INFO: No Floating Point Exceptions have been reported - 5,092,053,198 cycles # 2.749 GHz - 10,266,716,383 instructions # 2.02 insn per cycle - 1.853336006 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.078233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.679535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.679535e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.828978 sec -INFO: No Floating Point Exceptions have been reported - 5,049,904,876 cycles # 2.749 GHz - 10,046,122,437 instructions # 1.99 insn per cycle - 1.837563375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 776,657,888 cycles:u # 2.335 GHz (75.89%) + 2,351,642 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.98%) + 12,095,370 stalled-cycles-backend:u # 1.56% backend cycles idle (76.30%) + 1,353,218,013 instructions:u # 1.74 insn per cycle + # 0.01 stalled cycles per insn (73.68%) + 0.372378571 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.649805e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.982891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.982891e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.359308 sec -INFO: No Floating Point Exceptions have been reported - 4,441,902,341 cycles # 1.877 GHz - 8,494,262,942 instructions # 1.91 insn per cycle - 2.367884804 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 7bcb20b104..807729e577 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:11:50 -DATE: 2024-09-15_11:36:18 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.281805e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.729157e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.845967e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487987 sec -INFO: No Floating Point Exceptions have been reported - 2,055,281,728 cycles # 2.872 GHz - 2,956,266,509 instructions # 1.44 insn per cycle - 0.772297546 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.496211e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.588163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.588163e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.287732 sec -INFO: No Floating Point Exceptions have been reported - 12,584,199,997 cycles # 2.932 GHz - 34,606,962,286 instructions # 2.75 insn per cycle - 4.293417398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.317872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.783743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.783743e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.058410 sec -INFO: No Floating Point Exceptions have been reported - 6,058,288,486 cycles # 2.936 GHz - 14,847,536,122 instructions # 2.45 insn per cycle - 2.064093895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193414453417 -Relative difference = 1.6829758681196702e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.131335e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.950373e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.950373e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.556955 sec -INFO: No Floating Point Exceptions have been reported - 4,316,973,163 cycles # 2.764 GHz - 9,053,302,579 instructions # 2.10 insn per cycle - 1.562583378 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4460) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181974319741 -Relative difference = 9.731379272303266e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.308917e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.178890e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.178890e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.521257 sec -INFO: No Floating Point Exceptions have been reported - 4,205,210,775 cycles # 2.756 GHz - 8,662,511,141 instructions # 2.06 insn per cycle - 1.526851661 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4225) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception + 795,837,222 cycles:u # 2.403 GHz (73.91%) + 2,265,369 stalled-cycles-frontend:u # 0.28% frontend cycles idle (73.67%) + 7,926,590 stalled-cycles-backend:u # 1.00% backend cycles idle (74.48%) + 1,417,497,207 instructions:u # 1.78 insn per cycle + # 0.01 stalled cycles per insn (74.54%) + 0.369757999 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181974319741 -Relative difference = 9.731379272303266e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.372555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.809988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.809988e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.038768 sec -INFO: No Floating Point Exceptions have been reported - 3,837,026,814 cycles # 1.878 GHz - 7,805,330,859 instructions # 2.03 insn per cycle - 2.044464874 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183246739209 -Relative difference = 1.6003107281264138e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 69afb6ef9f..7866be57c1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:11:52 -DATE: 2024-09-15_11:36:38 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.211144e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.722563e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857000e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487617 sec -INFO: No Floating Point Exceptions have been reported - 2,060,698,152 cycles # 2.879 GHz - 2,912,783,409 instructions # 1.41 insn per cycle - 0.772686787 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.666171e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.771083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.771083e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.019199 sec -INFO: No Floating Point Exceptions have been reported - 11,827,953,010 cycles # 2.940 GHz - 35,076,444,454 instructions # 2.97 insn per cycle - 4.024833106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.415928e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902704e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.023749 sec -INFO: No Floating Point Exceptions have been reported - 5,953,573,213 cycles # 2.935 GHz - 14,468,346,196 instructions # 2.43 insn per cycle - 2.029398775 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193583255634 -Relative difference = 1.7661780742548925e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.382613e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.271116e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.271116e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.507601 sec -INFO: No Floating Point Exceptions have been reported - 4,170,609,924 cycles # 2.758 GHz - 8,881,070,721 instructions # 2.13 insn per cycle - 1.513291878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3570) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182104704902 -Relative difference = 1.0374044905426431e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.441367e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.356283e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.356283e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.495607 sec -INFO: No Floating Point Exceptions have been reported - 4,129,531,699 cycles # 2.752 GHz - 8,406,651,679 instructions # 2.04 insn per cycle - 1.501280641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception + 783,007,068 cycles:u # 2.350 GHz (76.69%) + 2,438,873 stalled-cycles-frontend:u # 0.31% frontend cycles idle (76.02%) + 6,261,316 stalled-cycles-backend:u # 0.80% backend cycles idle (76.46%) + 1,322,368,341 instructions:u # 1.69 insn per cycle + # 0.00 stalled cycles per insn (75.14%) + 0.371799349 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182104704902 -Relative difference = 1.0374044905426431e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.439343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.891063e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.891063e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.014863 sec -INFO: No Floating Point Exceptions have been reported - 3,794,076,081 cycles # 1.879 GHz - 7,699,347,303 instructions # 2.03 insn per cycle - 2.020593600 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183204829693 -Relative difference = 1.5796536184903122e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288173687877133 + File "", line 1 + me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 5fcfefd8b1..fc42016bc7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:27 -DATE: 2024-09-15_11:12:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.360888e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.282446e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.948861e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.535947 sec -INFO: No Floating Point Exceptions have been reported - 2,206,968,400 cycles # 2.863 GHz - 3,177,366,447 instructions # 1.44 insn per cycle - 0.829346124 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.801369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.847904e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.847904e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.967015 sec -INFO: No Floating Point Exceptions have been reported - 17,524,709,788 cycles # 2.932 GHz - 46,191,860,900 instructions # 2.64 insn per cycle - 5.978935443 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063903750300 -Relative difference = 3.0048445715164216e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.140467e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.299172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.299172e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.487825 sec -INFO: No Floating Point Exceptions have been reported - 10,261,674,067 cycles # 2.934 GHz - 27,722,537,189 instructions # 2.70 insn per cycle - 3.498978005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063903750300 -Relative difference = 3.0048445715164216e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.032137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.431710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.431710e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.231910 sec -INFO: No Floating Point Exceptions have been reported - 6,175,976,175 cycles # 2.753 GHz - 12,601,670,185 instructions # 2.04 insn per cycle - 2.244594220 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.534616e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.015525e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.015525e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.042072 sec -INFO: No Floating Point Exceptions have been reported - 5,669,805,165 cycles # 2.764 GHz - 12,036,562,183 instructions # 2.12 insn per cycle - 2.054181107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2519) (512y: 146) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 895,738,310 cycles:u # 2.385 GHz (75.30%) + 2,284,852 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.56%) + 5,964,182 stalled-cycles-backend:u # 0.67% backend cycles idle (75.25%) + 1,435,614,704 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.89%) + 0.411223145 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.587962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.783361e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783361e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.070999 sec -INFO: No Floating Point Exceptions have been reported - 5,754,052,644 cycles # 1.867 GHz - 8,225,264,257 instructions # 1.43 insn per cycle - 3.082911381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1863) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063423243869 + File "", line 1 + me1=; me2=2.0288063423243869; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index f3ccad1744..ee77467769 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:03:29 -DATE: 2024-09-15_11:13:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.403841e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.350229e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.960040e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.532145 sec -INFO: No Floating Point Exceptions have been reported - 2,202,225,096 cycles # 2.860 GHz - 3,140,327,784 instructions # 1.43 insn per cycle - 0.826897706 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.849779e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.898891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.898891e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.812131 sec -INFO: No Floating Point Exceptions have been reported - 17,080,273,912 cycles # 2.934 GHz - 45,215,696,703 instructions # 2.65 insn per cycle - 5.823642020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063903750300 -Relative difference = 3.0048445715164216e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.353337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.532203e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.532203e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.272970 sec -INFO: No Floating Point Exceptions have been reported - 9,622,203,648 cycles # 2.930 GHz - 26,352,115,115 instructions # 2.74 insn per cycle - 3.284795843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063903750300 -Relative difference = 3.0048445715164216e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.499939e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.814164e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.814164e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.448542 sec -INFO: No Floating Point Exceptions have been reported - 6,760,703,277 cycles # 2.754 GHz - 14,051,302,777 instructions # 2.08 insn per cycle - 2.455916079 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.725371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.062901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.062901e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.336085 sec -INFO: No Floating Point Exceptions have been reported - 6,433,871,158 cycles # 2.746 GHz - 13,544,684,713 instructions # 2.11 insn per cycle - 2.343643276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 914,849,134 cycles:u # 2.436 GHz (74.09%) + 2,451,232 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.59%) + 10,512,149 stalled-cycles-backend:u # 1.15% backend cycles idle (74.65%) + 1,396,328,721 instructions:u # 1.53 insn per cycle + # 0.01 stalled cycles per insn (74.70%) + 0.411717719 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.546154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.730746e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.730746e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.074398 sec -INFO: No Floating Point Exceptions have been reported - 5,684,367,008 cycles # 1.845 GHz - 9,231,965,840 instructions # 1.62 insn per cycle - 3.081687192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0288063423243869 + File "", line 1 + me1=; me2=2.0288063423243869; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 0fe4cfc922..17030efec2 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:31 -DATE: 2024-09-15_11:13:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.646703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.903323e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.008440e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.472515 sec -INFO: No Floating Point Exceptions have been reported - 1,976,395,629 cycles # 2.864 GHz - 2,853,369,004 instructions # 1.44 insn per cycle - 0.746437756 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 795,818,848 cycles:u # 0.778 GHz (75.10%) + 2,347,473 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.35%) + 12,569,594 stalled-cycles-backend:u # 1.58% backend cycles idle (75.12%) + 1,353,252,025 instructions:u # 1.70 insn per cycle + # 0.01 stalled cycles per insn (75.16%) + 1.078805003 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.044065e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.229313e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.240372e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.613454 sec -INFO: No Floating Point Exceptions have been reported - 2,457,306,952 cycles # 2.873 GHz - 3,760,458,763 instructions # 1.53 insn per cycle - 0.914581816 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.423163e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.435086e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.435086e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.782254 sec -INFO: No Floating Point Exceptions have been reported - 19,933,068,888 cycles # 2.938 GHz - 59,910,639,029 instructions # 3.01 insn per cycle - 6.786428407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 2,017,653,309 cycles:u # 2.808 GHz (75.55%) + 2,464,363 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.50%) + 10,357,340 stalled-cycles-backend:u # 0.51% backend cycles idle (75.54%) + 2,432,704,139 instructions:u # 1.21 insn per cycle + # 0.00 stalled cycles per insn (74.82%) + 0.755151771 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.574256e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615519e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.615519e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.601510 sec -INFO: No Floating Point Exceptions have been reported - 10,564,249,920 cycles # 2.931 GHz - 31,083,049,027 instructions # 2.94 insn per cycle - 3.605720194 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.109445e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.271688e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.271688e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.818340 sec -INFO: No Floating Point Exceptions have been reported - 4,995,758,651 cycles # 2.742 GHz - 11,404,411,821 instructions # 2.28 insn per cycle - 1.822613950 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.031962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.052518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.052518e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.607425 sec -INFO: No Floating Point Exceptions have been reported - 4,440,179,427 cycles # 2.756 GHz - 10,663,032,994 instructions # 2.40 insn per cycle - 1.611644858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.095641e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.193407e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.193407e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.331432 sec -INFO: No Floating Point Exceptions have been reported - 4,128,663,715 cycles # 1.768 GHz - 5,965,561,050 instructions # 1.44 insn per cycle - 2.335809030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131213684418644 + File "", line 1 + me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 555f99fae8..40c6e9f5ef 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,167 +1,59 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:17:09 -DATE: 2024-09-15_11:47:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.462205e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.092748e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.092748e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.504368 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,053,925,551 cycles # 2.862 GHz - 3,120,835,610 instructions # 1.52 insn per cycle - 0.775288198 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 828,494,905 cycles:u # 2.504 GHz (76.67%) + 2,589,636 stalled-cycles-frontend:u # 0.31% frontend cycles idle (76.65%) + 21,646,845 stalled-cycles-backend:u # 2.61% backend cycles idle (75.46%) + 1,335,042,676 instructions:u # 1.61 insn per cycle + # 0.02 stalled cycles per insn (75.25%) + 0.364821007 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.695563e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.383097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.383097e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.834031 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,122,984,951 cycles # 2.884 GHz - 5,028,895,726 instructions # 1.61 insn per cycle - 1.144542739 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.420974e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433351e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.433351e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.797187 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,924,578,475 cycles # 2.930 GHz - 59,919,807,490 instructions # 3.01 insn per cycle - 6.801426045 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 4,077,163,579 cycles:u # 3.002 GHz (75.22%) + 16,959,476 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.01%) + 837,378,519 stalled-cycles-backend:u # 20.54% backend cycles idle (75.14%) + 4,205,434,861 instructions:u # 1.03 insn per cycle + # 0.20 stalled cycles per insn (74.97%) + 1.391743442 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.519993e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.562146e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.562146e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.653302 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,746,732,815 cycles # 2.939 GHz - 31,134,499,346 instructions # 2.90 insn per cycle - 3.657616586 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,114 +61,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.065515e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.233179e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.233179e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.835810 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,036,150,788 cycles # 2.739 GHz - 11,457,434,104 instructions # 2.28 insn per cycle - 1.839969686 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.012086e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033024e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.647583 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,499,476,819 cycles # 2.725 GHz - 10,716,818,624 instructions # 2.38 insn per cycle - 1.651828196 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.046506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.146814e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.146814e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.355729 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,171,753,505 cycles # 1.769 GHz - 6,006,835,350 instructions # 1.44 insn per cycle - 2.359914843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131213684418644 + File "", line 1 + me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index df418c0c55..28ea2b77d0 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:35 -DATE: 2024-09-15_11:13:51 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.819441e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.940165e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.036270e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473171 sec -INFO: No Floating Point Exceptions have been reported - 1,973,905,564 cycles # 2.864 GHz - 2,835,389,936 instructions # 1.44 insn per cycle - 0.747859769 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 790,067,314 cycles:u # 2.455 GHz (75.50%) + 2,353,075 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.86%) + 9,910,058 stalled-cycles-backend:u # 1.25% backend cycles idle (74.28%) + 1,334,713,958 instructions:u # 1.69 insn per cycle + # 0.01 stalled cycles per insn (75.12%) + 0.360079549 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.045923e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.239053e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.249723e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.613723 sec -INFO: No Floating Point Exceptions have been reported - 2,468,512,324 cycles # 2.879 GHz - 3,722,507,305 instructions # 1.51 insn per cycle - 0.915283019 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.416240e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.428350e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.428350e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.802073 sec -INFO: No Floating Point Exceptions have been reported - 19,919,234,926 cycles # 2.929 GHz - 60,126,857,831 instructions # 3.02 insn per cycle - 6.806341598 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 1,997,776,639 cycles:u # 2.793 GHz (75.32%) + 2,591,438 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.42%) + 5,068,348 stalled-cycles-backend:u # 0.25% backend cycles idle (74.82%) + 2,425,917,284 instructions:u # 1.21 insn per cycle + # 0.00 stalled cycles per insn (74.31%) + 0.753671044 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.628115e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.671006e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.671006e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.560395 sec -INFO: No Floating Point Exceptions have been reported - 10,470,027,689 cycles # 2.938 GHz - 30,685,175,745 instructions # 2.93 insn per cycle - 3.564357324 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.858005e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.013532e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.013532e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.870006 sec -INFO: No Floating Point Exceptions have been reported - 5,129,037,452 cycles # 2.738 GHz - 11,838,972,708 instructions # 2.31 insn per cycle - 1.873874088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4748) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.652883e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.834044e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.834044e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.717936 sec -INFO: No Floating Point Exceptions have been reported - 4,726,163,144 cycles # 2.747 GHz - 11,165,051,323 instructions # 2.36 insn per cycle - 1.721718897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 245) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.029308e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.126499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.126499e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.353551 sec -INFO: No Floating Point Exceptions have been reported - 4,165,348,623 cycles # 1.768 GHz - 6,220,012,480 instructions # 1.49 insn per cycle - 2.357450464 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1513) (512y: 140) (512z: 3679) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131213684418644 + File "", line 1 + me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0344b19ae4..5c62f736c5 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:39 -DATE: 2024-09-15_11:14:17 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.690436e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002102e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.037933e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.455498 sec -INFO: No Floating Point Exceptions have been reported - 1,928,242,310 cycles # 2.870 GHz - 2,746,045,826 instructions # 1.42 insn per cycle - 0.728814382 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 795,425,366 cycles:u # 2.535 GHz (75.09%) + 2,323,507 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.76%) + 12,833,150 stalled-cycles-backend:u # 1.61% backend cycles idle (74.54%) + 1,318,876,921 instructions:u # 1.66 insn per cycle + # 0.01 stalled cycles per insn (74.18%) + 0.348861399 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.680469e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.378510e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424836e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.509889 sec -INFO: No Floating Point Exceptions have been reported - 2,121,031,452 cycles # 2.862 GHz - 3,036,959,694 instructions # 1.43 insn per cycle - 0.800065199 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.504687e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.517708e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.517708e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.560247 sec -INFO: No Floating Point Exceptions have been reported - 19,257,464,373 cycles # 2.934 GHz - 59,612,594,917 instructions # 3.10 insn per cycle - 6.564375492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,357,401,223 cycles:u # 2.729 GHz (74.51%) + 2,360,040 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.82%) + 5,861,415 stalled-cycles-backend:u # 0.43% backend cycles idle (75.00%) + 1,880,028,115 instructions:u # 1.39 insn per cycle + # 0.00 stalled cycles per insn (75.21%) + 0.534139334 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.084277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.218488e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.218488e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.043801 sec -INFO: No Floating Point Exceptions have been reported - 6,009,096,977 cycles # 2.936 GHz - 17,060,655,087 instructions # 2.84 insn per cycle - 2.047534449 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.741509e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801920e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.958794 sec -INFO: No Floating Point Exceptions have been reported - 2,632,796,186 cycles # 2.737 GHz - 6,187,347,650 instructions # 2.35 insn per cycle - 0.962496439 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.912269e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.986419e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.986419e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.875039 sec -INFO: No Floating Point Exceptions have been reported - 2,407,469,182 cycles # 2.742 GHz - 5,790,784,602 instructions # 2.41 insn per cycle - 0.878768885 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.443174e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485583e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.155478 sec -INFO: No Floating Point Exceptions have been reported - 2,073,615,836 cycles # 1.790 GHz - 3,391,178,624 instructions # 1.64 insn per cycle - 1.159306518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131674300257941 + File "", line 1 + me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 93fdf05be3..6aff05df93 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,167 +1,59 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:17:12 -DATE: 2024-09-15_11:47:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.480682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545762e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545762e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.469335 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,971,009,268 cycles # 2.861 GHz - 2,878,621,667 instructions # 1.46 insn per cycle - 0.747088963 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 819,166,315 cycles:u # 2.563 GHz (75.32%) + 2,697,447 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.78%) + 27,322,515 stalled-cycles-backend:u # 3.34% backend cycles idle (74.60%) + 1,319,678,705 instructions:u # 1.61 insn per cycle + # 0.02 stalled cycles per insn (73.07%) + 0.352508429 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.503732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.296845e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296845e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 -TOTAL : 0.653804 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,546,264,462 cycles # 2.875 GHz - 3,884,000,523 instructions # 1.53 insn per cycle - 0.944357505 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.503119e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.516211e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.568401 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,288,292,034 cycles # 2.936 GHz - 59,615,397,281 instructions # 3.09 insn per cycle - 6.572330246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 3,382,949,380 cycles:u # 3.026 GHz (74.85%) + 16,737,570 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.97%) + 830,499,979 stalled-cycles-backend:u # 24.55% backend cycles idle (74.44%) + 3,669,566,461 instructions:u # 1.08 insn per cycle + # 0.23 stalled cycles per insn (74.74%) + 1.149279792 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.074244e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.211619e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.211619e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.051717 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,032,564,255 cycles # 2.936 GHz - 17,108,905,426 instructions # 2.84 insn per cycle - 2.055577630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,114 +61,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.735664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.796597e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.796597e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.965943 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,652,269,080 cycles # 2.742 GHz - 6,224,274,753 instructions # 2.35 insn per cycle - 0.969710233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.901208e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.974602e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.974602e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.883566 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,424,412,243 cycles # 2.734 GHz - 5,827,930,388 instructions # 2.40 insn per cycle - 0.887425140 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.438672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.481527e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.481527e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.163548 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,093,109,210 cycles # 1.794 GHz - 3,432,132,802 instructions # 1.64 insn per cycle - 1.167531871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131674300257941 + File "", line 1 + me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 04fc107fbb..d98d10b469 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:41 -DATE: 2024-09-15_11:14:38 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.693098e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040838e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074373e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.456865 sec -INFO: No Floating Point Exceptions have been reported - 1,922,600,682 cycles # 2.857 GHz - 2,735,771,538 instructions # 1.42 insn per cycle - 0.729676644 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 804,704,967 cycles:u # 2.580 GHz (74.99%) + 2,485,868 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.03%) + 5,581,551 stalled-cycles-backend:u # 0.69% backend cycles idle (74.61%) + 1,289,552,140 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.72%) + 0.349270365 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.681790e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366127e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.406124e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.509422 sec -INFO: No Floating Point Exceptions have been reported - 2,117,565,229 cycles # 2.869 GHz - 3,056,275,302 instructions # 1.44 insn per cycle - 0.796290434 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.491972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504565e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.504565e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.593721 sec -INFO: No Floating Point Exceptions have been reported - 19,401,378,848 cycles # 2.941 GHz - 59,351,233,195 instructions # 3.06 insn per cycle - 6.597810534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 1,338,950,680 cycles:u # 2.709 GHz (74.38%) + 2,324,316 stalled-cycles-frontend:u # 0.17% frontend cycles idle (76.38%) + 10,977,568 stalled-cycles-backend:u # 0.82% backend cycles idle (76.62%) + 1,846,343,229 instructions:u # 1.38 insn per cycle + # 0.01 stalled cycles per insn (75.41%) + 0.529564954 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.427450e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.574205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.574205e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.961524 sec -INFO: No Floating Point Exceptions have been reported - 5,763,417,063 cycles # 2.934 GHz - 16,848,552,420 instructions # 2.92 insn per cycle - 1.965663621 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5611) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.513418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.559668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.559668e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.100420 sec -INFO: No Floating Point Exceptions have been reported - 3,014,454,268 cycles # 2.733 GHz - 6,847,622,992 instructions # 2.27 insn per cycle - 1.104094178 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.641246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695747e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.015943 sec -INFO: No Floating Point Exceptions have been reported - 2,793,517,683 cycles # 2.742 GHz - 6,436,907,448 instructions # 2.30 insn per cycle - 1.019630864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.322600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358678e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358678e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.258576 sec -INFO: No Floating Point Exceptions have been reported - 2,248,626,373 cycles # 1.783 GHz - 3,754,168,834 instructions # 1.67 insn per cycle - 1.262333902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2466) (512y: 29) (512z: 4084) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131674300257941 + File "", line 1 + me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 2641b6a6f8..67b9b653dc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:44 -DATE: 2024-09-15_11:14:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.553984e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.813684e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.925982e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473727 sec -INFO: No Floating Point Exceptions have been reported - 1,964,125,217 cycles # 2.842 GHz - 2,850,802,933 instructions # 1.45 insn per cycle - 0.747533169 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 816,747,087 cycles:u # 2.541 GHz (74.32%) + 2,217,663 stalled-cycles-frontend:u # 0.27% frontend cycles idle (77.46%) + 7,439,488 stalled-cycles-backend:u # 0.91% backend cycles idle (76.82%) + 1,328,789,320 instructions:u # 1.63 insn per cycle + # 0.01 stalled cycles per insn (76.44%) + 0.361184807 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.039046e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.224514e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235497e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.623745 sec -INFO: No Floating Point Exceptions have been reported - 2,491,113,981 cycles # 2.884 GHz - 3,741,355,868 instructions # 1.50 insn per cycle - 0.924633822 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.390069e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.401784e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.401784e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.876043 sec -INFO: No Floating Point Exceptions have been reported - 20,176,006,103 cycles # 2.934 GHz - 60,944,588,650 instructions # 3.02 insn per cycle - 6.880217907 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 2,050,860,031 cycles:u # 2.855 GHz (75.25%) + 2,539,060 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.60%) + 5,619,619 stalled-cycles-backend:u # 0.27% backend cycles idle (75.72%) + 2,421,380,911 instructions:u # 1.18 insn per cycle + # 0.00 stalled cycles per insn (74.15%) + 0.757114524 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213859069593 -Relative difference = 4.345647726386255e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.624106e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.667122e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.667122e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.563355 sec -INFO: No Floating Point Exceptions have been reported - 10,467,283,500 cycles # 2.935 GHz - 30,820,693,493 instructions # 2.94 insn per cycle - 3.567171047 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213792564823 -Relative difference = 4.392710025734405e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.172379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.336577e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.336577e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.806966 sec -INFO: No Floating Point Exceptions have been reported - 4,954,879,411 cycles # 2.737 GHz - 11,359,422,816 instructions # 2.29 insn per cycle - 1.810872816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.036379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.057513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057513e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.601666 sec -INFO: No Floating Point Exceptions have been reported - 4,380,983,099 cycles # 2.729 GHz - 10,610,165,712 instructions # 2.42 insn per cycle - 1.605990710 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 83) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.900446e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.995461e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.995461e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.397174 sec -INFO: No Floating Point Exceptions have been reported - 4,245,323,919 cycles # 1.769 GHz - 6,166,210,089 instructions # 1.45 insn per cycle - 2.401100901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2140) (512y: 117) (512z: 3653) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131213755569483 + File "", line 1 + me1=; me2=1.4131213755569483; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 0766319c3b..31382f9560 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-17_09:03:48 -DATE: 2024-09-15_11:15:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.556212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.906743e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.026143e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.472306 sec -INFO: No Floating Point Exceptions have been reported - 1,982,520,983 cycles # 2.874 GHz - 2,863,074,866 instructions # 1.44 insn per cycle - 0.745845869 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 805,702,716 cycles:u # 2.517 GHz (75.98%) + 2,417,935 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.25%) + 11,480,390 stalled-cycles-backend:u # 1.42% backend cycles idle (74.63%) + 1,356,931,070 instructions:u # 1.68 insn per cycle + # 0.01 stalled cycles per insn (74.03%) + 0.357191421 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.042838e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231665e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242155e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.611235 sec -INFO: No Floating Point Exceptions have been reported - 2,455,829,243 cycles # 2.879 GHz - 3,741,729,771 instructions # 1.52 insn per cycle - 0.912428146 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.386940e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.398394e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.398394e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.884140 sec -INFO: No Floating Point Exceptions have been reported - 20,272,568,697 cycles # 2.944 GHz - 61,168,730,148 instructions # 3.02 insn per cycle - 6.888274413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 2,030,800,576 cycles:u # 2.836 GHz (75.61%) + 2,533,232 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.12%) + 10,284,240 stalled-cycles-backend:u # 0.51% backend cycles idle (74.94%) + 2,396,848,084 instructions:u # 1.18 insn per cycle + # 0.00 stalled cycles per insn (74.52%) + 0.752414920 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213859069593 -Relative difference = 4.345647726386255e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.669440e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.713215e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.713215e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.529000 sec -INFO: No Floating Point Exceptions have been reported - 10,335,535,502 cycles # 2.926 GHz - 30,533,410,675 instructions # 2.95 insn per cycle - 3.532867905 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213792564823 -Relative difference = 4.392710025734405e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.803371e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.957146e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.957146e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.880842 sec -INFO: No Floating Point Exceptions have been reported - 5,141,108,977 cycles # 2.729 GHz - 11,871,626,607 instructions # 2.31 insn per cycle - 1.885060685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.734351e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.920231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.920231e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.703697 sec -INFO: No Floating Point Exceptions have been reported - 4,677,605,202 cycles # 2.740 GHz - 11,166,557,237 instructions # 2.39 insn per cycle - 1.707597039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 238) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.863155e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.956338e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.956338e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.409053 sec -INFO: No Floating Point Exceptions have been reported - 4,255,960,621 cycles # 1.764 GHz - 6,404,237,522 instructions # 1.50 insn per cycle - 2.413297760 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 163) (512z: 3731) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 1.4131213755569483 + File "", line 1 + me1=; me2=1.4131213755569483; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 5f3726dcea..aaa3cb9985 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:03:51 -DATE: 2024-09-15_11:15:51 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.308012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.334511e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.336215e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533591 sec -INFO: No Floating Point Exceptions have been reported - 2,205,805,938 cycles # 2.869 GHz - 3,444,884,387 instructions # 1.56 insn per cycle - 0.825307966 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,672,827,040 cycles:u # 1.274 GHz (74.29%) + 2,536,395 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.97%) + 5,501,869 stalled-cycles-backend:u # 0.33% backend cycles idle (75.35%) + 2,005,314,113 instructions:u # 1.20 insn per cycle + # 0.00 stalled cycles per insn (75.22%) + 1.370037399 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.131073e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161174e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162405e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.048991 sec -INFO: No Floating Point Exceptions have been reported - 9,688,028,273 cycles # 2.924 GHz - 22,036,541,373 instructions # 2.27 insn per cycle - 3.369850770 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.879946e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.880868e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880868e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.730075 sec -INFO: No Floating Point Exceptions have been reported - 25,643,153,835 cycles # 2.937 GHz - 78,954,437,611 instructions # 3.08 insn per cycle - 8.734432118 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 22,771,879,189 cycles:u # 3.429 GHz (74.99%) + 3,632,595 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) + 7,645,494 stalled-cycles-backend:u # 0.03% backend cycles idle (75.00%) + 20,219,629,853 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 6.681399663 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141133E-004 -Relative difference = 2.8372990776517314e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.520374e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.523613e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.523613e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.665332 sec -INFO: No Floating Point Exceptions have been reported - 13,099,128,105 cycles # 2.806 GHz - 39,559,591,481 instructions # 3.02 insn per cycle - 4.669271517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141122E-004 -Relative difference = 2.837299079287849e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.059011e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.075081e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.075081e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.042413 sec -INFO: No Floating Point Exceptions have been reported - 5,610,747,752 cycles # 2.743 GHz - 13,824,504,616 instructions # 2.46 insn per cycle - 2.046398223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.162703e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.184308e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.184308e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.796726 sec -INFO: No Floating Point Exceptions have been reported - 4,922,237,700 cycles # 2.735 GHz - 12,506,994,545 instructions # 2.54 insn per cycle - 1.800589813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.982168e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.994583e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.994583e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.355452 sec -INFO: No Floating Point Exceptions have been reported - 4,140,123,386 cycles # 1.756 GHz - 6,390,153,387 instructions # 1.54 insn per cycle - 2.359734916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 1d93db579b..49fa73b6a5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,167 +1,59 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:17:29 -DATE: 2024-09-15_11:48:14 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.969430e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.268357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.268357e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523555 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,166,841,175 cycles # 2.867 GHz - 3,453,451,458 instructions # 1.59 insn per cycle - 0.814918597 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,681,929,144 cycles:u # 2.927 GHz (75.10%) + 3,388,305 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.26%) + 47,046,516 stalled-cycles-backend:u # 2.80% backend cycles idle (75.35%) + 2,098,819,459 instructions:u # 1.25 insn per cycle + # 0.02 stalled cycles per insn (74.67%) + 0.608519815 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.613032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.091578e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.091578e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.317499 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,422,723,136 cycles # 2.898 GHz - 15,879,167,379 instructions # 1.52 insn per cycle - 3.658545225 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.878765e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.879684e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.879684e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.741583 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 25,666,397,830 cycles # 2.935 GHz - 78,965,262,045 instructions # 3.08 insn per cycle - 8.745862119 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 25,532,544,588 cycles:u # 3.408 GHz (75.00%) + 39,137,847 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) + 1,119,010,439 stalled-cycles-backend:u # 4.38% backend cycles idle (75.00%) + 22,646,170,640 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (75.03%) + 7.526076810 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141133E-004 -Relative difference = 2.8372990776517314e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.560977e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.564368e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.564368e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.617447 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,088,026,122 cycles # 2.833 GHz - 39,572,731,788 instructions # 3.02 insn per cycle - 4.621932955 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,114 +61,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141122E-004 -Relative difference = 2.837299079287849e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.016507e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.032941e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.032941e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.058059 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,631,279,447 cycles # 2.732 GHz - 13,836,775,240 instructions # 2.46 insn per cycle - 2.062638485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.172752e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.195878e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.195878e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.800276 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,940,734,767 cycles # 2.740 GHz - 12,518,660,568 instructions # 2.53 insn per cycle - 1.804734715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.912888e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.925297e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.925297e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.384873 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,161,817,801 cycles # 1.743 GHz - 6,405,054,448 instructions # 1.54 insn per cycle - 2.389410885 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index fc2e4b7aa0..9f61e25745 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:19:45 -DATE: 2024-09-15_11:59:16 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.322702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.346002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.347615e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.517480 sec -INFO: No Floating Point Exceptions have been reported - 2,154,192,085 cycles # 2.875 GHz - 3,384,532,263 instructions # 1.57 insn per cycle - 0.808566781 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,706,363,197 cycles:u # 3.011 GHz (74.82%) + 3,277,719 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.88%) + 37,627,569 stalled-cycles-backend:u # 2.21% backend cycles idle (74.60%) + 2,063,772,296 instructions:u # 1.21 insn per cycle + # 0.02 stalled cycles per insn (74.16%) + 0.597226347 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.137866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168584e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.158061 sec -INFO: No Floating Point Exceptions have been reported - 9,917,250,541 cycles # 2.905 GHz - 22,199,780,027 instructions # 2.24 insn per cycle - 3.469925481 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.876744e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877706e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877706e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.747062 sec -INFO: No Floating Point Exceptions have been reported - 25,648,612,426 cycles # 2.931 GHz - 78,952,780,288 instructions # 3.08 insn per cycle - 8.750949596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 25,046,951,153 cycles:u # 3.416 GHz (75.01%) + 28,734,977 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.01%) + 1,094,781,598 stalled-cycles-backend:u # 4.37% backend cycles idle (75.02%) + 21,796,222,378 instructions:u # 0.87 insn per cycle + # 0.05 stalled cycles per insn (74.96%) + 7.363577246 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141133E-004 -Relative difference = 2.8372990776517314e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.516490e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519795e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.519795e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.672214 sec -INFO: No Floating Point Exceptions have been reported - 13,064,181,413 cycles # 2.795 GHz - 39,557,975,845 instructions # 3.03 insn per cycle - 4.676162125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141122E-004 -Relative difference = 2.837299079287849e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.035027e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.051957e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.051957e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.050050 sec -INFO: No Floating Point Exceptions have been reported - 5,620,854,320 cycles # 2.738 GHz - 13,824,518,317 instructions # 2.46 insn per cycle - 2.054209584 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.147098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.168017e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.168017e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.801905 sec -INFO: No Floating Point Exceptions have been reported - 4,929,552,945 cycles # 2.731 GHz - 12,503,971,607 instructions # 2.54 insn per cycle - 1.805808510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.940588e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.953237e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.953237e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.372033 sec -INFO: No Floating Point Exceptions have been reported - 4,149,036,914 cycles # 1.747 GHz - 6,390,952,192 instructions # 1.54 insn per cycle - 2.375889237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 794fb1a802..5c6b4d7393 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,120 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:19:11 -DATE: 2024-09-15_11:53:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.043181e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.332865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.334802e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523228 sec -INFO: No Floating Point Exceptions have been reported - 2,169,607,107 cycles # 2.877 GHz - 3,459,237,306 instructions # 1.59 insn per cycle - 0.815202021 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,706,371,987 cycles:u # 2.992 GHz (75.54%) + 3,631,847 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.97%) + 38,649,177 stalled-cycles-backend:u # 2.26% backend cycles idle (74.77%) + 2,082,455,921 instructions:u # 1.22 insn per cycle + # 0.02 stalled cycles per insn (73.61%) + 0.612529378 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.724697e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.162122e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.163349e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.220306 sec -INFO: No Floating Point Exceptions have been reported - 10,133,189,238 cycles # 2.914 GHz - 23,148,993,968 instructions # 2.28 insn per cycle - 3.534065636 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.879192e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.880098e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880098e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.734016 sec -INFO: No Floating Point Exceptions have been reported - 25,661,507,280 cycles # 2.937 GHz - 78,953,590,713 instructions # 3.08 insn per cycle - 8.737772518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 25,402,369,575 cycles:u # 3.420 GHz (75.04%) + 39,113,205 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) + 1,097,225,273 stalled-cycles-backend:u # 4.32% backend cycles idle (74.96%) + 22,608,680,644 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (74.95%) + 7.452100580 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,140 +55,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141133E-004 -Relative difference = 2.8372990776517314e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.527947e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.531121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.531121e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.656259 sec -INFO: No Floating Point Exceptions have been reported - 13,056,695,540 cycles # 2.803 GHz - 39,560,471,761 instructions # 3.03 insn per cycle - 4.660116201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141122E-004 -Relative difference = 2.837299079287849e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.991321e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.007389e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.007389e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.059547 sec -INFO: No Floating Point Exceptions have been reported - 5,612,692,411 cycles # 2.721 GHz - 13,825,461,651 instructions # 2.46 insn per cycle - 2.063599543 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.168393e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.189788e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.189788e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.795888 sec -INFO: No Floating Point Exceptions have been reported - 4,923,088,043 cycles # 2.737 GHz - 12,506,721,234 instructions # 2.54 insn per cycle - 1.799844991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.949785e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.962226e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.962226e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.367179 sec -INFO: No Floating Point Exceptions have been reported - 4,144,948,568 cycles # 1.750 GHz - 6,391,796,529 instructions # 1.54 insn per cycle - 2.371216811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 80da0089a3..8581774aa9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:04:02 -DATE: 2024-09-15_11:16:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.297695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.326014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328135e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536243 sec -INFO: No Floating Point Exceptions have been reported - 2,198,395,650 cycles # 2.855 GHz - 3,395,768,128 instructions # 1.54 insn per cycle - 0.828484590 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 1,563,080,932 cycles:u # 2.897 GHz (75.08%) + 2,335,713 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.45%) + 5,785,858 stalled-cycles-backend:u # 0.37% backend cycles idle (74.46%) + 2,027,837,198 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (74.18%) + 0.577265916 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139738e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.170223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.171507e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.043962 sec -INFO: No Floating Point Exceptions have been reported - 9,646,114,898 cycles # 2.920 GHz - 22,170,499,370 instructions # 2.30 insn per cycle - 3.361578134 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884927e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885834e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885834e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.707889 sec -INFO: No Floating Point Exceptions have been reported - 25,619,332,595 cycles # 2.941 GHz - 78,702,929,908 instructions # 3.07 insn per cycle - 8.712420077 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 22,165,132,675 cycles:u # 3.427 GHz (75.02%) + 3,483,789 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 7,652,025 stalled-cycles-backend:u # 0.03% backend cycles idle (75.01%) + 19,788,795,973 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 6.507227844 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141133E-004 -Relative difference = 2.8372990776517314e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.574307e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.577560e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.577560e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.595188 sec -INFO: No Floating Point Exceptions have been reported - 13,048,399,086 cycles # 2.838 GHz - 39,450,691,251 instructions # 3.02 insn per cycle - 4.599210719 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141122E-004 -Relative difference = 2.837299079287849e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.930739e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.946422e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.946422e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.074898 sec -INFO: No Floating Point Exceptions have been reported - 5,675,338,380 cycles # 2.732 GHz - 13,910,840,784 instructions # 2.45 insn per cycle - 2.079006346 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.062486e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.083766e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.083766e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.816561 sec -INFO: No Floating Point Exceptions have been reported - 4,996,440,015 cycles # 2.746 GHz - 12,603,390,155 instructions # 2.52 insn per cycle - 1.820566072 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.965367e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.977715e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.977715e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.361480 sec -INFO: No Floating Point Exceptions have been reported - 4,159,091,159 cycles # 1.759 GHz - 6,499,576,244 instructions # 1.56 insn per cycle - 2.365402468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1751) (512y: 194) (512z: 9382) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index a149b91e1f..2d3a62200d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:11:55 -DATE: 2024-09-15_11:36:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.106076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.133080e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.544695 sec -INFO: No Floating Point Exceptions have been reported - 2,219,428,600 cycles # 2.858 GHz - 3,493,527,234 instructions # 1.57 insn per cycle - 0.834499500 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception + 1,661,905,073 cycles:u # 2.966 GHz (74.96%) + 2,555,694 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.68%) + 6,486,605 stalled-cycles-backend:u # 0.39% backend cycles idle (75.73%) + 2,055,254,079 instructions:u # 1.24 insn per cycle + # 0.00 stalled cycles per insn (75.84%) + 0.598193571 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.753348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.778591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.779655e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.312449 sec -INFO: No Floating Point Exceptions have been reported - 10,421,899,087 cycles # 2.913 GHz - 24,058,421,553 instructions # 2.31 insn per cycle - 3.636091022 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.268976e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.269450e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.269450e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.423779 sec -INFO: No Floating Point Exceptions have been reported - 112,730,268,623 cycles # 2.934 GHz - 144,772,135,406 instructions # 1.28 insn per cycle - 38.427951659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception + 22,752,766,839 cycles:u # 3.426 GHz (74.95%) + 3,567,249 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) + 6,939,581 stalled-cycles-backend:u # 0.03% backend cycles idle (75.07%) + 20,271,008,331 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.04%) + 6.678436209 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140461E-004 -Relative difference = 2.8372991790910424e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.077363e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.079742e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.079742e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.335511 sec -INFO: No Floating Point Exceptions have been reported - 14,752,370,812 cycles # 2.763 GHz - 37,645,694,563 instructions # 2.55 insn per cycle - 5.339828429 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.332306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.345602e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.345602e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.243210 sec -INFO: No Floating Point Exceptions have been reported - 6,130,370,628 cycles # 2.729 GHz - 13,060,931,234 instructions # 2.13 insn per cycle - 2.247428561 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46973) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.812729e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.832105e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.832105e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.867280 sec -INFO: No Floating Point Exceptions have been reported - 5,063,580,201 cycles # 2.707 GHz - 11,453,397,200 instructions # 2.26 insn per cycle - 1.871531437 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40498) (512y: 285) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.266726e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.280024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.280024e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.263638 sec -INFO: No Floating Point Exceptions have been reported - 3,957,788,966 cycles # 1.746 GHz - 5,926,468,977 instructions # 1.50 insn per cycle - 2.267826067 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39349) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index c0add05aa1..0c0eab7b00 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:12:05 -DATE: 2024-09-15_11:38:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.096121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.123086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.125058e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.541972 sec -INFO: No Floating Point Exceptions have been reported - 2,219,963,696 cycles # 2.870 GHz - 3,470,909,979 instructions # 1.56 insn per cycle - 0.830712751 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception + 1,585,284,024 cycles:u # 2.916 GHz (75.24%) + 2,383,798 stalled-cycles-frontend:u # 0.15% frontend cycles idle (73.60%) + 5,588,685 stalled-cycles-backend:u # 0.35% backend cycles idle (74.30%) + 2,036,305,234 instructions:u # 1.28 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 0.580606077 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.756387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.782287e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783295e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.298693 sec -INFO: No Floating Point Exceptions have been reported - 10,399,316,447 cycles # 2.922 GHz - 23,584,057,660 instructions # 2.27 insn per cycle - 3.614068267 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.224460e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.224899e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.224899e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.827526 sec -INFO: No Floating Point Exceptions have been reported - 113,783,414,735 cycles # 2.930 GHz - 144,278,309,276 instructions # 1.27 insn per cycle - 38.831628591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception + 22,138,389,204 cycles:u # 3.424 GHz (75.07%) + 3,668,578 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.13%) + 8,480,796 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) + 19,767,092,627 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (74.88%) + 6.501829671 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140450E-004 -Relative difference = 2.83729918072716e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.989108e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.991357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.991357e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.492626 sec -INFO: No Floating Point Exceptions have been reported - 15,275,599,565 cycles # 2.780 GHz - 38,389,599,156 instructions # 2.51 insn per cycle - 5.496788286 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.497881e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.512338e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.512338e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.193501 sec -INFO: No Floating Point Exceptions have been reported - 6,019,122,923 cycles # 2.740 GHz - 12,933,620,431 instructions # 2.15 insn per cycle - 2.197765722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46099) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.839318e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.859960e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.859960e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.861821 sec -INFO: No Floating Point Exceptions have been reported - 5,093,783,286 cycles # 2.731 GHz - 11,449,481,812 instructions # 2.25 insn per cycle - 1.866150033 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40142) (512y: 219) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.279822e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.293417e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.293417e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.259516 sec -INFO: No Floating Point Exceptions have been reported - 3,958,337,222 cycles # 1.750 GHz - 5,889,113,860 instructions # 1.49 insn per cycle - 2.263750575 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38927) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6266731198158101E-004 + File "", line 1 + me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index f53bdfcb06..751330cae0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:04:11 -DATE: 2024-09-15_11:16:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.467249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.509285e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513718e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.494218 sec -INFO: No Floating Point Exceptions have been reported - 2,034,103,432 cycles # 2.860 GHz - 3,045,186,386 instructions # 1.50 insn per cycle - 0.768364063 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,418,805,040 cycles:u # 2.875 GHz (75.49%) + 2,313,274 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.64%) + 6,853,709 stalled-cycles-backend:u # 0.48% backend cycles idle (74.21%) + 1,875,134,710 instructions:u # 1.32 insn per cycle + # 0.00 stalled cycles per insn (74.32%) + 0.530722972 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.128844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.190571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.193248e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.796967 sec -INFO: No Floating Point Exceptions have been reported - 5,948,917,067 cycles # 2.924 GHz - 12,254,957,631 instructions # 2.06 insn per cycle - 2.089755272 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.939424e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940380e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.461205 sec -INFO: No Floating Point Exceptions have been reported - 24,939,277,475 cycles # 2.947 GHz - 79,109,068,255 instructions # 3.17 insn per cycle - 8.465315543 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 13,382,958,070 cycles:u # 3.382 GHz (75.13%) + 4,088,491 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.06%) + 6,955,897 stalled-cycles-backend:u # 0.05% backend cycles idle (74.90%) + 12,294,040,023 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (74.84%) + 3.993439825 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.989306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.001573e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.001573e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.351266 sec -INFO: No Floating Point Exceptions have been reported - 6,525,064,847 cycles # 2.771 GHz - 20,269,487,959 instructions # 3.11 insn per cycle - 2.355049106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.582613e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.589051e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.589051e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.041194 sec -INFO: No Floating Point Exceptions have been reported - 2,848,829,047 cycles # 2.729 GHz - 7,065,493,216 instructions # 2.48 insn per cycle - 1.044894531 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.794003e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.802231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.802231e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.919231 sec -INFO: No Floating Point Exceptions have been reported - 2,522,001,135 cycles # 2.735 GHz - 6,403,495,458 instructions # 2.54 insn per cycle - 0.923373159 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.403418e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.408437e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.408437e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.173408 sec -INFO: No Floating Point Exceptions have been reported - 2,065,585,282 cycles # 1.756 GHz - 3,303,212,083 instructions # 1.60 insn per cycle - 1.177101647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 99ccf0b7c6..1eefdd2ba8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,167 +1,59 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:17:40 -DATE: 2024-09-15_11:48:48 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.945945e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.468849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.468849e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.482042 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,997,062,353 cycles # 2.864 GHz - 3,031,546,242 instructions # 1.52 insn per cycle - 0.755384112 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,499,735,570 cycles:u # 2.874 GHz (75.92%) + 2,905,221 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.49%) + 71,774,380 stalled-cycles-backend:u # 4.79% backend cycles idle (75.06%) + 1,842,322,240 instructions:u # 1.23 insn per cycle + # 0.04 stalled cycles per insn (74.87%) + 0.553912247 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.954589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.017029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.017029e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 2.150287 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,911,046,669 cycles # 2.894 GHz - 9,720,301,924 instructions # 1.41 insn per cycle - 2.447262326 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934088e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934088e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.491300 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 24,914,436,852 cycles # 2.933 GHz - 79,112,976,787 instructions # 3.18 insn per cycle - 8.495346137 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 18,087,443,953 cycles:u # 3.395 GHz (74.95%) + 30,366,376 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.02%) + 2,148,114,724 stalled-cycles-backend:u # 11.88% backend cycles idle (75.08%) + 14,456,886,209 instructions:u # 0.80 insn per cycle + # 0.15 stalled cycles per insn (75.03%) + 5.362091783 seconds time elapsed ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.985457e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.998623e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.998623e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.355396 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,536,388,186 cycles # 2.771 GHz - 20,278,657,318 instructions # 3.10 insn per cycle - 2.359340287 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,114 +61,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.588469e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.595113e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.595113e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.040170 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,854,653,705 cycles # 2.736 GHz - 7,075,192,119 instructions # 2.48 insn per cycle - 1.044116961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.764583e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.772721e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.772721e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.937436 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,529,004,808 cycles # 2.688 GHz - 6,413,196,189 instructions # 2.54 insn per cycle - 0.941494819 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.393518e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.398724e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.398724e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.184860 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,077,752,907 cycles # 1.749 GHz - 3,313,647,639 instructions # 1.59 insn per cycle - 1.188846310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 19f64c3e7a..49d6289b93 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:19:55 -DATE: 2024-09-15_11:59:51 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.517186e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.553165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.556664e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 -TOTAL : 0.477606 sec -INFO: No Floating Point Exceptions have been reported - 1,989,569,741 cycles # 2.873 GHz - 3,005,042,417 instructions # 1.51 insn per cycle - 0.749585148 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,483,123,567 cycles:u # 2.868 GHz (74.57%) + 2,717,393 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.89%) + 71,784,445 stalled-cycles-backend:u # 4.84% backend cycles idle (74.13%) + 1,843,882,297 instructions:u # 1.24 insn per cycle + # 0.04 stalled cycles per insn (74.27%) + 0.564320838 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.132207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.190283e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.192869e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.892599 sec -INFO: No Floating Point Exceptions have been reported - 6,180,255,032 cycles # 2.913 GHz - 13,158,154,431 instructions # 2.13 insn per cycle - 2.179693271 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.919400e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920348e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920348e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.550896 sec -INFO: No Floating Point Exceptions have been reported - 24,917,761,266 cycles # 2.921 GHz - 79,107,928,249 instructions # 3.17 insn per cycle - 8.554412617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 17,890,534,241 cycles:u # 3.412 GHz (74.94%) + 20,006,662 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 2,147,565,042 stalled-cycles-backend:u # 12.00% backend cycles idle (74.98%) + 13,812,088,209 instructions:u # 0.77 insn per cycle + # 0.16 stalled cycles per insn (74.97%) + 5.275581467 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.947395e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.959971e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.959971e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.367170 sec -INFO: No Floating Point Exceptions have been reported - 6,537,639,637 cycles # 2.759 GHz - 20,270,199,231 instructions # 3.10 insn per cycle - 2.370801699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.589719e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.596352e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.596352e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.038242 sec -INFO: No Floating Point Exceptions have been reported - 2,854,773,942 cycles # 2.742 GHz - 7,065,309,093 instructions # 2.47 insn per cycle - 1.041774606 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.793297e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801693e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801693e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.921472 sec -INFO: No Floating Point Exceptions have been reported - 2,523,779,273 cycles # 2.730 GHz - 6,401,399,707 instructions # 2.54 insn per cycle - 0.925110369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.398357e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403401e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403401e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.179617 sec -INFO: No Floating Point Exceptions have been reported - 2,071,965,297 cycles # 1.751 GHz - 3,301,502,867 instructions # 1.59 insn per cycle - 1.184374263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index dd6ac10521..58b75480e2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,120 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:19:22 -DATE: 2024-09-15_11:54:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.065242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.543001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.546426e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.481262 sec -INFO: No Floating Point Exceptions have been reported - 1,996,897,335 cycles # 2.872 GHz - 2,970,634,149 instructions # 1.49 insn per cycle - 0.754034663 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,494,375,955 cycles:u # 2.883 GHz (75.37%) + 2,853,013 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.32%) + 71,723,714 stalled-cycles-backend:u # 4.80% backend cycles idle (75.23%) + 1,837,402,005 instructions:u # 1.23 insn per cycle + # 0.04 stalled cycles per insn (74.64%) + 0.544142086 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.141287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.200996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.203446e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.902964 sec -INFO: No Floating Point Exceptions have been reported - 6,190,673,302 cycles # 2.901 GHz - 13,306,269,630 instructions # 2.15 insn per cycle - 2.189368892 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933487e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934407e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934407e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.487231 sec -INFO: No Floating Point Exceptions have been reported - 24,900,546,223 cycles # 2.933 GHz - 79,107,234,987 instructions # 3.18 insn per cycle - 8.490956598 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 18,159,840,783 cycles:u # 3.407 GHz (75.00%) + 30,335,851 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.09%) + 2,143,390,625 stalled-cycles-backend:u # 11.80% backend cycles idle (74.95%) + 14,514,491,057 instructions:u # 0.80 insn per cycle + # 0.15 stalled cycles per insn (74.85%) + 5.356682373 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,140 +55,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.983704e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.996217e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.996217e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.353085 sec -INFO: No Floating Point Exceptions have been reported - 6,541,995,614 cycles # 2.777 GHz - 20,269,407,860 instructions # 3.10 insn per cycle - 2.356873297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.585450e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.591953e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.591953e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.039372 sec -INFO: No Floating Point Exceptions have been reported - 2,850,375,088 cycles # 2.735 GHz - 7,065,899,998 instructions # 2.48 insn per cycle - 1.043028953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.792341e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800787e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800787e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.920003 sec -INFO: No Floating Point Exceptions have been reported - 2,517,551,147 cycles # 2.728 GHz - 6,403,207,803 instructions # 2.54 insn per cycle - 0.923687532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.402198e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.407285e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.407285e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.174725 sec -INFO: No Floating Point Exceptions have been reported - 2,068,104,176 cycles # 1.756 GHz - 3,303,725,822 instructions # 1.60 insn per cycle - 1.178407380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 0807d31ee5..c40d39c4c8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:04:18 -DATE: 2024-09-15_11:17:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.482391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.527725e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.532031e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493760 sec -INFO: No Floating Point Exceptions have been reported - 2,047,228,800 cycles # 2.877 GHz - 3,039,242,832 instructions # 1.48 insn per cycle - 0.768472979 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 1,363,608,148 cycles:u # 2.811 GHz (74.70%) + 2,330,167 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.97%) + 8,399,284 stalled-cycles-backend:u # 0.62% backend cycles idle (75.57%) + 1,862,082,973 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (75.68%) + 0.521364052 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.102852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.164567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.167207e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.802499 sec -INFO: No Floating Point Exceptions have been reported - 5,908,213,594 cycles # 2.909 GHz - 12,456,477,911 instructions # 2.11 insn per cycle - 2.093284072 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.936218e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.937180e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.937180e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.475402 sec -INFO: No Floating Point Exceptions have been reported - 24,949,332,764 cycles # 2.943 GHz - 78,839,555,653 instructions # 3.16 insn per cycle - 8.479529977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 13,402,881,221 cycles:u # 3.400 GHz (75.04%) + 3,099,577 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) + 6,402,571 stalled-cycles-backend:u # 0.05% backend cycles idle (75.14%) + 12,222,937,447 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (75.23%) + 3.977923159 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866250177339E-004 -Relative difference = 5.65798569465384e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.122699e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.135567e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.135567e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.306947 sec -INFO: No Floating Point Exceptions have been reported - 6,466,639,499 cycles # 2.800 GHz - 20,230,851,658 instructions # 3.13 insn per cycle - 2.310638118 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861448331612E-004 -Relative difference = 2.1853408865157068e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.507818e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.513887e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.513887e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.092466 sec -INFO: No Floating Point Exceptions have been reported - 2,980,915,950 cycles # 2.722 GHz - 7,206,628,057 instructions # 2.42 insn per cycle - 1.096222389 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.724603e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.732183e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.732183e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.955874 sec -INFO: No Floating Point Exceptions have been reported - 2,613,667,112 cycles # 2.726 GHz - 6,544,516,026 instructions # 2.50 insn per cycle - 0.959652526 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 26) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.352025e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.356715e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.356715e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.218129 sec -INFO: No Floating Point Exceptions have been reported - 2,137,040,914 cycles # 1.750 GHz - 3,460,849,319 instructions # 1.62 insn per cycle - 1.221974093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952032316561E-004 -Relative difference = 3.066631594207157e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 507a64eed8..9be7bf54b0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:12:14 -DATE: 2024-09-15_11:39:16 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.567838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.605874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.609566e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.496412 sec -INFO: No Floating Point Exceptions have been reported - 2,051,587,227 cycles # 2.873 GHz - 3,025,794,403 instructions # 1.47 insn per cycle - 0.774558823 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception + 1,375,130,992 cycles:u # 2.798 GHz (75.90%) + 2,261,311 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.61%) + 11,313,852 stalled-cycles-backend:u # 0.82% backend cycles idle (75.57%) + 1,856,959,657 instructions:u # 1.35 insn per cycle + # 0.01 stalled cycles per insn (75.25%) + 0.529727587 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.651284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.721094e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.724249e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.731680 sec -INFO: No Floating Point Exceptions have been reported - 5,770,677,421 cycles # 2.916 GHz - 12,010,283,008 instructions # 2.08 insn per cycle - 2.035197700 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.459828e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.460600e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.460600e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.042280 sec -INFO: No Floating Point Exceptions have been reported - 86,122,252,676 cycles # 2.867 GHz - 135,657,307,138 instructions # 1.58 insn per cycle - 30.046456599 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception + 13,458,291,676 cycles:u # 3.396 GHz (75.04%) + 3,048,501 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.07%) + 7,478,777 stalled-cycles-backend:u # 0.06% backend cycles idle (75.03%) + 12,292,736,463 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 3.997543610 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349717465765E-004 -Relative difference = 4.26303654465793e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.672428e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.686393e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.686393e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.462988 sec -INFO: No Floating Point Exceptions have been reported - 6,758,193,786 cycles # 2.742 GHz - 19,357,772,182 instructions # 2.86 insn per cycle - 2.467248153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69591) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862748188362E-004 -Relative difference = 4.14665283800746e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.362305e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367046e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367046e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.208685 sec -INFO: No Floating Point Exceptions have been reported - 3,166,621,827 cycles # 2.612 GHz - 6,792,444,940 instructions # 2.15 insn per cycle - 1.212802697 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49012) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.652877e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.659885e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.659885e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.997146 sec -INFO: No Floating Point Exceptions have been reported - 2,625,468,482 cycles # 2.624 GHz - 5,970,509,824 instructions # 2.27 insn per cycle - 1.001249505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42601) (512y: 11) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.322992e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.327409e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327409e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.244528 sec -INFO: No Floating Point Exceptions have been reported - 2,076,691,772 cycles # 1.664 GHz - 3,494,505,327 instructions # 1.68 insn per cycle - 1.248709350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5207) (512y: 3) (512z:44836) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750237027223E-004 -Relative difference = 3.5765412974815996e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index c027e74779..1bb36dc113 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:12:21 -DATE: 2024-09-15_11:40:08 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.598287e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.601913e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.492421 sec -INFO: No Floating Point Exceptions have been reported - 2,010,223,902 cycles # 2.826 GHz - 3,031,193,233 instructions # 1.51 insn per cycle - 0.770287796 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception + 1,385,776,268 cycles:u # 2.833 GHz (76.08%) + 2,351,309 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.66%) + 7,512,705 stalled-cycles-backend:u # 0.54% backend cycles idle (74.89%) + 1,806,674,893 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (74.70%) + 0.525743522 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.689601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.749985e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.752940e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.732807 sec -INFO: No Floating Point Exceptions have been reported - 5,640,346,322 cycles # 2.872 GHz - 11,210,275,869 instructions # 1.99 insn per cycle - 2.022037581 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.446094e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.446837e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.446837e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.118401 sec -INFO: No Floating Point Exceptions have been reported - 86,113,084,692 cycles # 2.859 GHz - 135,363,065,912 instructions # 1.57 insn per cycle - 30.122446956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception + 13,316,566,072 cycles:u # 3.391 GHz (75.04%) + 3,187,894 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.16%) + 6,509,743 stalled-cycles-backend:u # 0.05% backend cycles idle (75.12%) + 12,143,279,403 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (74.90%) + 3.961298270 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349662128086E-004 -Relative difference = 5.098002770919431e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.516652e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.527742e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.527742e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.521798 sec -INFO: No Floating Point Exceptions have been reported - 6,856,870,344 cycles # 2.715 GHz - 19,407,796,379 instructions # 2.83 insn per cycle - 2.529187527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69622) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862799683282E-004 -Relative difference = 4.2243518621014775e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.378784e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383778e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383778e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.194647 sec -INFO: No Floating Point Exceptions have been reported - 3,106,911,149 cycles # 2.593 GHz - 6,716,375,817 instructions # 2.16 insn per cycle - 1.199018593 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47699) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.633831e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642301e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642301e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.009236 sec -INFO: No Floating Point Exceptions have been reported - 2,628,290,758 cycles # 2.601 GHz - 5,969,462,739 instructions # 2.27 insn per cycle - 1.017917591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41882) (512y: 13) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.325974e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330533e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330533e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.242092 sec -INFO: No Floating Point Exceptions have been reported - 2,077,381,824 cycles # 1.674 GHz - 3,490,865,426 instructions # 1.68 insn per cycle - 1.248861709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44487) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750247886592E-004 -Relative difference = 3.740400032174438e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 6.6271025600481842E-004 + File "", line 1 + me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index f1d40dff2c..f0a09b8c50 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,83 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:04:25 -DATE: 2024-09-15_11:17:52 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.307071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.337162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.339154e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532508 sec -INFO: No Floating Point Exceptions have been reported - 2,203,526,312 cycles # 2.869 GHz - 3,467,986,959 instructions # 1.57 insn per cycle - 0.824379177 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 1,687,819,594 cycles:u # 3.013 GHz (74.16%) + 2,553,938 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.50%) + 7,130,585 stalled-cycles-backend:u # 0.42% backend cycles idle (75.69%) + 1,978,130,748 instructions:u # 1.17 insn per cycle + # 0.00 stalled cycles per insn (75.74%) + 0.597897044 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133497e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.164330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.165560e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.045403 sec -INFO: No Floating Point Exceptions have been reported - 9,651,769,916 cycles # 2.918 GHz - 21,560,396,285 instructions # 2.23 insn per cycle - 3.363407224 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 22,883,618,037 cycles:u # 3.431 GHz (74.93%) + 3,644,122 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) + 6,582,753 stalled-cycles-backend:u # 0.03% backend cycles idle (74.98%) + 20,394,827,984 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 6.706756716 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,174 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.856718e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.857596e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857596e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.839571 sec -INFO: No Floating Point Exceptions have been reported - 25,916,613,183 cycles # 2.931 GHz - 79,423,792,934 instructions # 3.06 insn per cycle - 8.843857471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731406016235E-004 -Relative difference = 2.8059296349552523e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.495399e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498545e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.498545e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.699126 sec -INFO: No Floating Point Exceptions have been reported - 12,847,395,150 cycles # 2.733 GHz - 38,826,102,030 instructions # 3.02 insn per cycle - 4.703180057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730246908442E-004 -Relative difference = 2.98084507782618e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.037182e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.053225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.053225e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.047520 sec -INFO: No Floating Point Exceptions have been reported - 5,598,661,180 cycles # 2.730 GHz - 13,618,631,873 instructions # 2.43 insn per cycle - 2.051512013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.221665e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.243251e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.243251e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.785642 sec -INFO: No Floating Point Exceptions have been reported - 4,865,374,839 cycles # 2.720 GHz - 12,297,660,832 instructions # 2.53 insn per cycle - 1.789585857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 79) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.872514e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.884400e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.884400e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.393483 sec -INFO: No Floating Point Exceptions have been reported - 4,171,721,525 cycles # 1.741 GHz - 6,391,185,056 instructions # 1.53 insn per cycle - 2.397568985 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1980) (512y: 93) (512z: 9360) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=6.6266732376103494E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index ae3635632d..8dc3670619 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,83 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-17_09:04:35 -DATE: 2024-09-15_11:18:27 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.320961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.350573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352533e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532217 sec -INFO: No Floating Point Exceptions have been reported - 2,207,479,579 cycles # 2.873 GHz - 3,464,148,832 instructions # 1.57 insn per cycle - 0.824297603 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 1,602,679,069 cycles:u # 2.961 GHz (75.53%) + 2,397,976 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.22%) + 6,616,527 stalled-cycles-backend:u # 0.41% backend cycles idle (75.57%) + 2,010,182,080 instructions:u # 1.25 insn per cycle + # 0.00 stalled cycles per insn (74.57%) + 0.577145300 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.148990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.180422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.181658e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.035739 sec -INFO: No Floating Point Exceptions have been reported - 9,612,803,881 cycles # 2.915 GHz - 20,074,302,744 instructions # 2.09 insn per cycle - 3.353532451 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 22,138,381,754 cycles:u # 3.427 GHz (74.99%) + 3,614,153 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.99%) + 7,061,353 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) + 19,722,872,230 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.10%) + 6.495291149 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,174 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626675e-04 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 6.6266732376103494E-004 -Relative difference = 2.659538381540814e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.831511e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.832364e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.832364e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.961804 sec -INFO: No Floating Point Exceptions have been reported - 26,010,493,082 cycles # 2.902 GHz - 79,449,384,960 instructions # 3.05 insn per cycle - 8.965752302 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731406016235E-004 -Relative difference = 2.8059296349552523e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.477024e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.480127e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.480127e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.723753 sec -INFO: No Floating Point Exceptions have been reported - 12,826,084,303 cycles # 2.714 GHz - 38,778,289,694 instructions # 3.02 insn per cycle - 4.727826379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730246908442E-004 -Relative difference = 2.98084507782618e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.051767e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.067821e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.067821e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.043694 sec -INFO: No Floating Point Exceptions have been reported - 5,591,778,218 cycles # 2.733 GHz - 13,733,552,430 instructions # 2.46 insn per cycle - 2.047665232 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.123898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.144357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.144357e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.804403 sec -INFO: No Floating Point Exceptions have been reported - 4,951,573,094 cycles # 2.739 GHz - 12,422,632,916 instructions # 2.51 insn per cycle - 1.808331695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.884699e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.896731e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.896731e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.389141 sec -INFO: No Floating Point Exceptions have been reported - 4,181,828,175 cycles # 1.750 GHz - 6,496,177,989 instructions # 1.55 insn per cycle - 2.393377398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 191) (512z: 9369) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=6.6266732376103494E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 407fbbe6c0..8256ec0032 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:20:21 +DATE: 2024-09-17_09:05:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.059284e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059685e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059813e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.451621 sec -INFO: No Floating Point Exceptions have been reported - 8,080,887,114 cycles # 2.907 GHz - 16,734,437,330 instructions # 2.07 insn per cycle - 2.836211679 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.254596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.256737e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.256950e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.020216 sec -INFO: No Floating Point Exceptions have been reported - 12,719,492,672 cycles # 2.923 GHz - 29,448,097,640 instructions # 2.32 insn per cycle - 4.407436029 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.610103e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.610301e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.610301e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.935255 sec -INFO: No Floating Point Exceptions have been reported - 18,974,774,871 cycles # 2.735 GHz - 53,899,721,094 instructions # 2.84 insn per cycle - 6.939338261 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.193686e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.193730e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193730e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.437795 sec +INFO: No Floating Point Exceptions have been reported + 15,506,555,900 cycles:u # 3.500 GHz (74.95%) + 10,366,372 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) + 1,555,120,464 stalled-cycles-backend:u # 10.03% backend cycles idle (74.93%) + 53,488,558,564 instructions:u # 3.45 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 4.444923149 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.579226e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.579318e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.579318e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.345304 sec -INFO: No Floating Point Exceptions have been reported - 9,800,813,517 cycles # 2.927 GHz - 27,149,189,789 instructions # 2.77 insn per cycle - 3.349514409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.301383e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.301519e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.301519e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.296195 sec +INFO: No Floating Point Exceptions have been reported + 8,049,162,022 cycles:u # 3.501 GHz (75.07%) + 39,350,066 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.97%) + 775,648,333 stalled-cycles-backend:u # 9.64% backend cycles idle (74.95%) + 27,055,593,072 instructions:u # 3.36 insn per cycle + # 0.03 stalled cycles per insn (74.95%) + 2.303653868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.366336e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366803e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366803e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.570292 sec -INFO: No Floating Point Exceptions have been reported - 4,287,053,926 cycles # 2.724 GHz - 9,590,127,631 instructions # 2.24 insn per cycle - 1.574599019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.163026e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.163527e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.163527e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.024913 sec +INFO: No Floating Point Exceptions have been reported + 3,595,500,755 cycles:u # 3.497 GHz (74.89%) + 1,581,611 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.11%) + 279,683,618 stalled-cycles-backend:u # 7.78% backend cycles idle (75.10%) + 9,554,292,818 instructions:u # 2.66 insn per cycle + # 0.03 stalled cycles per insn (75.10%) + 1.042223630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.904765e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.905290e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.905290e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.353979 sec -INFO: No Floating Point Exceptions have been reported - 3,709,436,689 cycles # 2.733 GHz - 8,514,247,183 instructions # 2.30 insn per cycle - 1.357880276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.407683e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408196e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408196e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.552623 sec -INFO: No Floating Point Exceptions have been reported - 2,699,560,921 cycles # 1.736 GHz - 4,280,862,154 instructions # 1.59 insn per cycle - 1.556608026 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index e032151033..f29745a6a4 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:49:15 +DATE: 2024-09-17_09:17:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.054597e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.057500e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.057500e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.436820 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,044,156,582 cycles # 2.913 GHz - 18,167,469,518 instructions # 2.26 insn per cycle - 2.819990438 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.188185e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.221546e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.221546e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.024442 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 12,704,137,155 cycles # 2.918 GHz - 27,467,799,669 instructions # 2.16 insn per cycle - 4.411963692 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.352982e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.353176e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.353176e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.182913 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,533,134,643 cycles # 2.719 GHz - 53,904,822,620 instructions # 2.76 insn per cycle - 7.186820393 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.208280e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208318e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.374666 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,322,456,911 cycles:u # 3.500 GHz (74.97%) + 7,524,722 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.96%) + 1,567,010,890 stalled-cycles-backend:u # 10.23% backend cycles idle (74.96%) + 53,484,034,623 instructions:u # 3.49 insn per cycle + # 0.03 stalled cycles per insn (74.97%) + 4.382022884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.583220e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.583307e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.583307e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.337581 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,779,129,351 cycles # 2.927 GHz - 27,151,664,900 instructions # 2.78 insn per cycle - 3.341583664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.274898e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.275032e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.275032e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.321252 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,133,520,976 cycles:u # 3.500 GHz (74.90%) + 79,472,583 stalled-cycles-frontend:u # 0.98% frontend cycles idle (74.87%) + 864,701,293 stalled-cycles-backend:u # 10.63% backend cycles idle (74.87%) + 27,087,071,552 instructions:u # 3.33 insn per cycle + # 0.03 stalled cycles per insn (74.90%) + 2.328456266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.365450e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.365854e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.365854e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.570834 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,275,074,655 cycles # 2.716 GHz - 9,592,294,661 instructions # 2.24 insn per cycle - 1.574792391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.071395e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.071828e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.071828e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.042998 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,656,725,861 cycles:u # 3.496 GHz (74.85%) + 23,585,030 stalled-cycles-frontend:u # 0.64% frontend cycles idle (74.77%) + 308,692,447 stalled-cycles-backend:u # 8.44% backend cycles idle (74.76%) + 9,557,086,165 instructions:u # 2.61 insn per cycle + # 0.03 stalled cycles per insn (74.77%) + 1.051124543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.876513e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.877048e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.877048e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.364090 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,720,013,902 cycles # 2.721 GHz - 8,517,094,572 instructions # 2.29 insn per cycle - 1.368386654 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.420108e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.420617e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420617e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.546895 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,698,104,238 cycles # 1.741 GHz - 4,283,566,876 instructions # 1.59 insn per cycle - 1.551097954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 71b1803a4d..4195920c5e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:21:49 +DATE: 2024-09-17_09:06:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.055075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055529e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055650e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.454190 sec -INFO: No Floating Point Exceptions have been reported - 8,101,270,896 cycles # 2.912 GHz - 18,320,414,768 instructions # 2.26 insn per cycle - 2.837550341 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.224205e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.226444e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.226692e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.029157 sec -INFO: No Floating Point Exceptions have been reported - 12,695,828,795 cycles # 2.910 GHz - 28,709,503,011 instructions # 2.26 insn per cycle - 4.420420636 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.201824e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.202080e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.202080e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.438167 sec -INFO: No Floating Point Exceptions have been reported - 18,843,132,149 cycles # 2.926 GHz - 53,928,570,497 instructions # 2.86 insn per cycle - 6.442267111 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.189589e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.189626e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.189626e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.440884 sec +INFO: No Floating Point Exceptions have been reported + 15,571,050,388 cycles:u # 3.504 GHz (75.05%) + 4,077,492 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.99%) + 1,637,764,291 stalled-cycles-backend:u # 10.52% backend cycles idle (74.98%) + 53,466,110,925 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 4.448720346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.562611e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.562704e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562704e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.380180 sec -INFO: No Floating Point Exceptions have been reported - 9,918,861,148 cycles # 2.932 GHz - 27,128,280,341 instructions # 2.74 insn per cycle - 3.383996000 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.350282e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.350421e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.350421e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.248138 sec +INFO: No Floating Point Exceptions have been reported + 7,882,038,590 cycles:u # 3.502 GHz (74.88%) + 1,475,397 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.78%) + 752,175,610 stalled-cycles-backend:u # 9.54% backend cycles idle (74.88%) + 27,060,670,580 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 2.255285871 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.368711e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.369114e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.369114e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.568940 sec -INFO: No Floating Point Exceptions have been reported - 4,289,720,535 cycles # 2.728 GHz - 9,584,928,513 instructions # 2.23 insn per cycle - 1.573132113 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.109296e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.109761e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.109761e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.034695 sec +INFO: No Floating Point Exceptions have been reported + 3,617,489,803 cycles:u # 3.487 GHz (74.56%) + 24,486,372 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.80%) + 320,757,347 stalled-cycles-backend:u # 8.87% backend cycles idle (75.19%) + 9,566,966,571 instructions:u # 2.64 insn per cycle + # 0.03 stalled cycles per insn (75.33%) + 1.042060292 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.874256e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.874798e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.874798e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.364932 sec -INFO: No Floating Point Exceptions have been reported - 3,728,944,037 cycles # 2.726 GHz - 8,507,330,131 instructions # 2.28 insn per cycle - 1.368786926 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.414224e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.414743e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.414743e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.549013 sec -INFO: No Floating Point Exceptions have been reported - 2,698,122,905 cycles # 1.738 GHz - 4,280,648,246 instructions # 1.59 insn per cycle - 1.553090413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 185) (512z:79098) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 26694465db..94035a1c5a 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:23:17 +DATE: 2024-09-17_09:06:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.208704e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.209632e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.209859e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.761935 sec -INFO: No Floating Point Exceptions have been reported - 5,908,004,381 cycles # 2.901 GHz - 11,686,361,328 instructions # 1.98 insn per cycle - 2.093948305 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.102338e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.102897e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.103014e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.075560 sec -INFO: No Floating Point Exceptions have been reported - 6,795,219,354 cycles # 2.902 GHz - 14,967,758,240 instructions # 2.20 insn per cycle - 2.398428041 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.563117e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.563375e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.563375e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.168224 sec -INFO: No Floating Point Exceptions have been reported - 18,106,019,929 cycles # 2.934 GHz - 53,907,716,361 instructions # 2.98 insn per cycle - 6.172403776 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.098477e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.098501e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.098501e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.808590 sec +INFO: No Floating Point Exceptions have been reported + 16,860,952,832 cycles:u # 3.505 GHz (74.91%) + 104,207,065 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.91%) + 1,799,771,021 stalled-cycles-backend:u # 10.67% backend cycles idle (74.99%) + 54,159,229,986 instructions:u # 3.21 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 4.816228082 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.366569e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366962e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366962e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.570259 sec -INFO: No Floating Point Exceptions have been reported - 4,597,646,888 cycles # 2.923 GHz - 13,807,163,752 instructions # 3.00 insn per cycle - 1.574045592 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.942387e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.942898e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.942898e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.070162 sec +INFO: No Floating Point Exceptions have been reported + 3,739,406,171 cycles:u # 3.485 GHz (74.65%) + 654,362 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.56%) + 370,462,939 stalled-cycles-backend:u # 9.91% backend cycles idle (74.94%) + 13,768,829,664 instructions:u # 3.68 insn per cycle + # 0.03 stalled cycles per insn (75.40%) + 1.077878306 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.801272e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.802916e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.802916e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.778379 sec -INFO: No Floating Point Exceptions have been reported - 2,130,043,758 cycles # 2.726 GHz - 4,836,599,174 instructions # 2.27 insn per cycle - 0.782206721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.010053e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.010205e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010205e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.524427 sec +INFO: No Floating Point Exceptions have been reported + 1,837,704,710 cycles:u # 3.485 GHz (74.34%) + 19,085,726 stalled-cycles-frontend:u # 1.04% frontend cycles idle (74.22%) + 161,785,061 stalled-cycles-backend:u # 8.80% backend cycles idle (74.48%) + 4,826,428,263 instructions:u # 2.63 insn per cycle + # 0.03 stalled cycles per insn (75.24%) + 0.532111325 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.682520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.684604e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.684604e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688940 sec -INFO: No Floating Point Exceptions have been reported - 1,884,507,725 cycles # 2.723 GHz - 4,290,819,235 instructions # 2.28 insn per cycle - 0.692749981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.875530e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.877565e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.877565e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.770700 sec -INFO: No Floating Point Exceptions have been reported - 1,352,613,897 cycles # 1.747 GHz - 2,162,405,721 instructions # 1.60 insn per cycle - 0.774947088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 8e4037314e..fd31d9982c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:50:44 +DATE: 2024-09-17_09:18:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.261572e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.268191e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.268191e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 -TOTAL : 1.738260 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,820,188,860 cycles # 2.913 GHz - 12,502,480,728 instructions # 2.15 insn per cycle - 2.056507800 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.148842e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.160493e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.160493e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 -TOTAL : 2.045649 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,753,066,902 cycles # 2.917 GHz - 14,813,097,918 instructions # 2.19 insn per cycle - 2.374262766 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.502637e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.502889e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.502889e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.211468 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,143,644,137 cycles # 2.921 GHz - 53,909,939,321 instructions # 2.97 insn per cycle - 6.215559174 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.100021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.100042e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100042e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.802584 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 16,836,387,718 cycles:u # 3.504 GHz (74.91%) + 102,891,510 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.01%) + 1,786,741,578 stalled-cycles-backend:u # 10.61% backend cycles idle (75.03%) + 54,206,960,406 instructions:u # 3.22 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 4.810260994 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.339632e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.340031e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340031e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.583158 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,644,642,721 cycles # 2.928 GHz - 13,808,855,992 instructions # 2.97 insn per cycle - 1.587116749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.937626e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.938053e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.938053e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.071536 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,755,091,069 cycles:u # 3.495 GHz (74.77%) + 342,975 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.69%) + 345,549,721 stalled-cycles-backend:u # 9.20% backend cycles idle (74.69%) + 13,780,342,259 instructions:u # 3.67 insn per cycle + # 0.03 stalled cycles per insn (74.86%) + 1.079071136 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.786207e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.787843e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.787843e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.780530 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,130,238,055 cycles # 2.718 GHz - 4,838,587,482 instructions # 2.27 insn per cycle - 0.784611119 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.035090e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.035254e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.035254e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.512549 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,798,851,477 cycles:u # 3.489 GHz (74.84%) + 814,547 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.18%) + 169,382,973 stalled-cycles-backend:u # 9.42% backend cycles idle (75.18%) + 4,813,565,737 instructions:u # 2.68 insn per cycle + # 0.04 stalled cycles per insn (75.18%) + 0.519968158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.698223e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.700507e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.700507e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688171 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,885,276,472 cycles # 2.726 GHz - 4,293,094,440 instructions # 2.28 insn per cycle - 0.692122848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.810636e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.812720e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.812720e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.777981 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,355,130,660 cycles # 1.735 GHz - 2,164,600,762 instructions # 1.60 insn per cycle - 0.782043846 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 113bcaacf7..84d6dbe34f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:24:20 +DATE: 2024-09-17_09:07:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.202287e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.203031e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.203251e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.759878 sec -INFO: No Floating Point Exceptions have been reported - 5,921,315,429 cycles # 2.907 GHz - 12,451,469,321 instructions # 2.10 insn per cycle - 2.095417433 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.113173e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.113784e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.113870e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.088774 sec -INFO: No Floating Point Exceptions have been reported - 6,829,574,271 cycles # 2.905 GHz - 14,898,722,914 instructions # 2.18 insn per cycle - 2.410171422 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260107E-003 -Relative difference = 0.0021940095370041636 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.526309e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.526569e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.526569e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.192756 sec -INFO: No Floating Point Exceptions have been reported - 18,135,421,902 cycles # 2.927 GHz - 53,892,650,631 instructions # 2.97 insn per cycle - 6.196840431 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.091252e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091274e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091274e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.841255 sec +INFO: No Floating Point Exceptions have been reported + 16,949,591,621 cycles:u # 3.499 GHz (74.91%) + 105,838,515 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.92%) + 1,813,583,312 stalled-cycles-backend:u # 10.70% backend cycles idle (75.00%) + 54,172,544,496 instructions:u # 3.20 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 4.848693160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087572898E-003 -Relative difference = 2.1198021522715588e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614199186E-003 +Relative difference = 3.435558690007174e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.396709e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.397124e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.397124e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.556067 sec -INFO: No Floating Point Exceptions have been reported - 4,573,398,855 cycles # 2.934 GHz - 13,800,378,388 instructions # 3.02 insn per cycle - 1.559827589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.961056e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.961517e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.961517e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.066638 sec +INFO: No Floating Point Exceptions have been reported + 3,731,151,584 cycles:u # 3.489 GHz (74.62%) + 318,149 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.59%) + 360,595,962 stalled-cycles-backend:u # 9.66% backend cycles idle (74.87%) + 13,778,695,801 instructions:u # 3.69 insn per cycle + # 0.03 stalled cycles per insn (75.24%) + 1.074783050 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896065809E-003 -Relative difference = 3.151856596628469e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.651495e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.653049e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.653049e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.795401 sec -INFO: No Floating Point Exceptions have been reported - 2,148,860,867 cycles # 2.691 GHz - 4,840,602,339 instructions # 2.25 insn per cycle - 0.799229981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.031957e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.032121e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.032121e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.513718 sec +INFO: No Floating Point Exceptions have been reported + 1,806,685,149 cycles:u # 3.497 GHz (74.32%) + 875,735 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.16%) + 150,213,316 stalled-cycles-backend:u # 8.31% backend cycles idle (75.23%) + 4,822,283,812 instructions:u # 2.67 insn per cycle + # 0.03 stalled cycles per insn (75.23%) + 0.521477439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.688407e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.690576e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.690576e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688541 sec -INFO: No Floating Point Exceptions have been reported - 1,890,706,185 cycles # 2.733 GHz - 4,294,394,779 instructions # 2.27 insn per cycle - 0.692328039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 24) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.826093e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.828148e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.828148e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.775828 sec -INFO: No Floating Point Exceptions have been reported - 1,357,390,482 cycles # 1.742 GHz - 2,169,212,126 instructions # 1.60 insn per cycle - 0.779795742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982957326E-003 -Relative difference = 2.0044082998332894e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 2e59aa2257..75711f9d04 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:25:23 +DATE: 2024-09-17_09:07:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.663841e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.664390e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.664590e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.203630 sec -INFO: No Floating Point Exceptions have been reported - 7,260,397,959 cycles # 2.866 GHz - 15,031,707,879 instructions # 2.07 insn per cycle - 2.589013700 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107763e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108067e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108098e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.442022 sec -INFO: No Floating Point Exceptions have been reported - 10,932,120,354 cycles # 2.895 GHz - 24,906,946,249 instructions # 2.28 insn per cycle - 3.831975982 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.516129e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.516327e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.516327e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.025685 sec -INFO: No Floating Point Exceptions have been reported - 19,256,305,943 cycles # 2.740 GHz - 54,130,622,749 instructions # 2.81 insn per cycle - 7.029878997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.190124e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190161e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.190161e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.438246 sec +INFO: No Floating Point Exceptions have been reported + 15,526,840,838 cycles:u # 3.497 GHz (74.98%) + 2,952,770 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) + 1,504,887,667 stalled-cycles-backend:u # 9.69% backend cycles idle (74.97%) + 53,733,477,093 instructions:u # 3.46 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 4.446093513 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.524890e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.524973e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.524973e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.464849 sec -INFO: No Floating Point Exceptions have been reported - 9,453,784,509 cycles # 2.726 GHz - 26,186,103,091 instructions # 2.77 insn per cycle - 3.468732831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.474679e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.474835e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.474835e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.135568 sec +INFO: No Floating Point Exceptions have been reported + 7,481,522,304 cycles:u # 3.499 GHz (74.95%) + 2,226,802 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) + 772,729,863 stalled-cycles-backend:u # 10.33% backend cycles idle (74.94%) + 25,864,411,572 instructions:u # 3.46 insn per cycle + # 0.03 stalled cycles per insn (74.94%) + 2.142734106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.508306e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.508754e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508754e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.507182 sec -INFO: No Floating Point Exceptions have been reported - 4,099,795,192 cycles # 2.715 GHz - 9,249,955,249 instructions # 2.26 insn per cycle - 1.510975685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.285089e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.285622e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.285622e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.000009 sec +INFO: No Floating Point Exceptions have been reported + 3,489,273,672 cycles:u # 3.480 GHz (74.51%) + 47,806,415 stalled-cycles-frontend:u # 1.37% frontend cycles idle (74.71%) + 340,340,818 stalled-cycles-backend:u # 9.75% backend cycles idle (75.11%) + 9,098,277,117 instructions:u # 2.61 insn per cycle + # 0.04 stalled cycles per insn (75.28%) + 1.007958406 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.116819e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.117442e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.117442e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.284411 sec -INFO: No Floating Point Exceptions have been reported - 3,509,716,252 cycles # 2.725 GHz - 8,182,475,258 instructions # 2.33 insn per cycle - 1.288638878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 79) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.462021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.462537e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.462537e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.527279 sec -INFO: No Floating Point Exceptions have been reported - 2,661,319,941 cycles # 1.739 GHz - 4,172,569,565 instructions # 1.57 insn per cycle - 1.531717386 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 93) (512z:78910) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index f2e4a2151c..fc5355db1b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-15_11:26:49 +DATE: 2024-09-17_09:08:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.668216e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.668742e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.668891e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.204926 sec -INFO: No Floating Point Exceptions have been reported - 7,354,907,186 cycles # 2.903 GHz - 15,835,326,846 instructions # 2.15 insn per cycle - 2.589353613 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111109e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111413e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111447e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.435805 sec -INFO: No Floating Point Exceptions have been reported - 11,002,728,447 cycles # 2.923 GHz - 25,822,053,923 instructions # 2.35 insn per cycle - 3.822280777 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.824002e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.824211e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.824211e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.748238 sec -INFO: No Floating Point Exceptions have been reported - 19,286,477,225 cycles # 2.857 GHz - 54,157,907,603 instructions # 2.81 insn per cycle - 6.752432065 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.194515e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.194553e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.194553e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.422163 sec +INFO: No Floating Point Exceptions have been reported + 15,487,855,703 cycles:u # 3.500 GHz (74.90%) + 5,300,953 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) + 1,623,019,803 stalled-cycles-backend:u # 10.48% backend cycles idle (75.05%) + 53,736,438,153 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 4.429837474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.548001e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.548086e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.548086e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.412561 sec -INFO: No Floating Point Exceptions have been reported - 9,302,368,855 cycles # 2.723 GHz - 26,085,336,117 instructions # 2.80 insn per cycle - 3.416771061 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95938) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.516087e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.516245e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.516245e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.100570 sec +INFO: No Floating Point Exceptions have been reported + 7,347,023,987 cycles:u # 3.493 GHz (74.90%) + 1,435,300 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.90%) + 816,939,030 stalled-cycles-backend:u # 11.12% backend cycles idle (74.90%) + 25,751,836,706 instructions:u # 3.51 insn per cycle + # 0.03 stalled cycles per insn (74.90%) + 2.107659262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.533570e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.534051e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.534051e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.496264 sec -INFO: No Floating Point Exceptions have been reported - 4,086,923,304 cycles # 2.726 GHz - 9,212,952,806 instructions # 2.25 insn per cycle - 1.500090267 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.538499e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.539051e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.539051e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.955297 sec +INFO: No Floating Point Exceptions have been reported + 3,355,645,011 cycles:u # 3.502 GHz (75.15%) + 1,161,133 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) + 304,879,843 stalled-cycles-backend:u # 9.09% backend cycles idle (74.96%) + 9,038,488,166 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 0.962866348 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.068352e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.068931e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.068931e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.299365 sec -INFO: No Floating Point Exceptions have been reported - 3,513,960,907 cycles # 2.698 GHz - 8,167,668,326 instructions # 2.32 insn per cycle - 1.303235401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 229) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.521239e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.521794e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.521794e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.500932 sec -INFO: No Floating Point Exceptions have been reported - 2,617,549,535 cycles # 1.740 GHz - 4,166,941,618 instructions # 1.59 insn per cycle - 1.504880250 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 175) (512z:78884) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 73af5e5b3a..d8bc134c62 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:04:45 -DATE: 2024-09-15_11:19:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.740481e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.765338e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886154e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459950 sec -INFO: No Floating Point Exceptions have been reported - 1,934,018,539 cycles # 2.861 GHz - 2,739,518,446 instructions # 1.42 insn per cycle - 0.734446139 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 765,289,831 cycles:u # 0.783 GHz (75.41%) + 2,386,123 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.52%) + 5,614,304 stalled-cycles-backend:u # 0.73% backend cycles idle (75.33%) + 1,234,817,074 instructions:u # 1.61 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 1.033292307 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.975676e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.474629e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.695966e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.539678 sec -INFO: No Floating Point Exceptions have been reported - 2,257,806,163 cycles # 2.877 GHz - 3,239,125,642 instructions # 1.43 insn per cycle - 0.841027050 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 975,011,130 cycles:u # 2.267 GHz (75.50%) + 2,444,951 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.99%) + 11,480,073 stalled-cycles-backend:u # 1.18% backend cycles idle (75.99%) + 1,400,584,923 instructions:u # 1.44 insn per cycle + # 0.01 stalled cycles per insn (75.38%) + 0.483827229 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x148aab185000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x148d41f302e2 in ??? +#1 0x148d41f2f475 in ??? +#2 0x148d40254dbf in ??? +#3 0x148d40254d2b in ??? +#4 0x148d402563e4 in ??? +#5 0x148d37bd0d1b in ??? +#6 0x148d37bcabc8 in ??? +#7 0x148d37b7c9e6 in ??? +#8 0x148d37b486e9 in ??? +#9 0x148d4032250e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.056428e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078475e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.570596 sec -INFO: No Floating Point Exceptions have been reported - 4,620,202,435 cycles # 2.935 GHz - 13,190,173,768 instructions # 2.85 insn per cycle - 1.574765138 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.450385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.479153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.479153e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.153489 sec +INFO: No Floating Point Exceptions have been reported + 4,044,738,022 cycles:u # 3.498 GHz (74.84%) + 2,922,470 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.09%) + 830,501,015 stalled-cycles-backend:u # 20.53% backend cycles idle (75.10%) + 13,147,449,578 instructions:u # 3.25 insn per cycle + # 0.06 stalled cycles per insn (75.10%) + 1.160916733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.870844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942105e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.895190 sec -INFO: No Floating Point Exceptions have been reported - 2,640,894,010 cycles # 2.940 GHz - 7,556,112,587 instructions # 2.86 insn per cycle - 0.899078617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.540097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.629678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629678e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.668727 sec +INFO: No Floating Point Exceptions have been reported + 2,339,388,961 cycles:u # 3.483 GHz (75.06%) + 2,245,463 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) + 554,219,476 stalled-cycles-backend:u # 23.69% backend cycles idle (74.99%) + 7,489,152,435 instructions:u # 3.20 insn per cycle + # 0.07 stalled cycles per insn (74.99%) + 0.676035087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.155420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.359383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.359383e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.539606 sec -INFO: No Floating Point Exceptions have been reported - 1,490,717,557 cycles # 2.746 GHz - 3,161,146,919 instructions # 2.12 insn per cycle - 0.543466540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.760795e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.087556e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.087556e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.368041 sec +INFO: No Floating Point Exceptions have been reported + 1,286,017,437 cycles:u # 3.466 GHz (74.70%) + 1,971,200 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.18%) + 265,343,931 stalled-cycles-backend:u # 20.63% backend cycles idle (74.14%) + 3,082,088,970 instructions:u # 2.40 insn per cycle + # 0.09 stalled cycles per insn (74.19%) + 0.375625105 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.514709e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763624e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.485581 sec -INFO: No Floating Point Exceptions have been reported - 1,345,992,067 cycles # 2.752 GHz - 3,013,895,719 instructions # 2.24 insn per cycle - 0.489750963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.329309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438411e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.725847 sec -INFO: No Floating Point Exceptions have been reported - 1,326,647,346 cycles # 1.820 GHz - 1,963,906,161 instructions # 1.48 insn per cycle - 0.729744934 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 87049bf6bc..477e20b0a5 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,133 +1,106 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:17:16 -DATE: 2024-09-15_11:47:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.302816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642797e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642797e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.484911 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,993,081,726 cycles # 2.857 GHz - 2,967,605,428 instructions # 1.49 insn per cycle - 0.755486323 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 837,267,745 cycles:u # 2.489 GHz (71.08%) + 2,659,462 stalled-cycles-frontend:u # 0.32% frontend cycles idle (76.15%) + 21,721,888 stalled-cycles-backend:u # 2.59% backend cycles idle (76.24%) + 1,236,659,602 instructions:u # 1.48 insn per cycle + # 0.02 stalled cycles per insn (76.32%) + 0.368314009 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.256871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.326938e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.326938e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.759153 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,900,800,476 cycles # 2.878 GHz - 4,476,324,954 instructions # 1.54 insn per cycle - 1.066391830 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 3,162,312,776 cycles:u # 2.918 GHz (73.52%) + 17,166,417 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.58%) + 813,908,886 stalled-cycles-backend:u # 25.74% backend cycles idle (75.99%) + 3,174,485,339 instructions:u # 1.00 insn per cycle + # 0.26 stalled cycles per insn (75.65%) + 1.115704853 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x14f094d35000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x14f32c0502e2 in ??? +#1 0x14f32c04f475 in ??? +#2 0x14f32a374dbf in ??? +#3 0x14f32a374d2b in ??? +#4 0x14f32a3763e4 in ??? +#5 0x14f321cf0d1b in ??? +#6 0x14f321ceabc8 in ??? +#7 0x14f321c9c9e6 in ??? +#8 0x14f321c686e9 in ??? +#9 0x14f32a44250e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.051618e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.074674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.074674e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.585397 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,659,791,475 cycles # 2.933 GHz - 13,199,729,048 instructions # 2.83 insn per cycle - 1.589552076 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.448986e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.477760e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.477760e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.158541 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,047,436,102 cycles:u # 3.483 GHz (74.77%) + 3,030,691 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.61%) + 816,402,733 stalled-cycles-backend:u # 20.17% backend cycles idle (74.80%) + 13,152,643,657 instructions:u # 3.25 insn per cycle + # 0.06 stalled cycles per insn (75.10%) + 1.166666605 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +108,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.863646e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935223e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.905908 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,672,075,267 cycles # 2.939 GHz - 7,605,973,490 instructions # 2.85 insn per cycle - 0.909977972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.535706e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.624712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.624712e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.673486 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,361,808,073 cycles:u # 3.490 GHz (74.66%) + 2,515,451 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.08%) + 550,090,025 stalled-cycles-backend:u # 23.29% backend cycles idle (75.18%) + 7,509,163,852 instructions:u # 3.18 insn per cycle + # 0.07 stalled cycles per insn (75.18%) + 0.680952675 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +145,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.113398e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.317707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.317707e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.554094 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,524,354,103 cycles # 2.734 GHz - 3,211,905,393 instructions # 2.11 insn per cycle - 0.558166519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.742888e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.066882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.066882e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.372683 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,300,445,328 cycles:u # 3.459 GHz (74.65%) + 2,012,156 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.49%) + 267,226,121 stalled-cycles-backend:u # 20.55% backend cycles idle (74.48%) + 3,103,504,775 instructions:u # 2.39 insn per cycle + # 0.09 stalled cycles per insn (74.48%) + 0.380188324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,80 +182,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.488860e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737446e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.497012 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,381,887,844 cycles # 2.761 GHz - 3,066,710,334 instructions # 2.22 insn per cycle - 0.501143809 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.170464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268423e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268423e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.785429 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,369,203,746 cycles # 1.799 GHz - 2,005,266,999 instructions # 1.46 insn per cycle - 0.789533436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index f184fc3b5e..a484bbc168 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:04:52 -DATE: 2024-09-15_11:19:15 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.732857e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.764454e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.875095e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459022 sec -INFO: No Floating Point Exceptions have been reported - 1,935,515,541 cycles # 2.868 GHz - 2,740,568,582 instructions # 1.42 insn per cycle - 0.732282850 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 744,075,935 cycles:u # 2.279 GHz (74.85%) + 2,209,449 stalled-cycles-frontend:u # 0.30% frontend cycles idle (77.83%) + 13,615,528 stalled-cycles-backend:u # 1.83% backend cycles idle (78.09%) + 1,320,674,802 instructions:u # 1.77 insn per cycle + # 0.01 stalled cycles per insn (74.69%) + 0.381986849 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.938986e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.388894e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.605968e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.543627 sec -INFO: No Floating Point Exceptions have been reported - 2,240,911,931 cycles # 2.849 GHz - 3,134,508,527 instructions # 1.40 insn per cycle - 0.843804985 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 980,134,192 cycles:u # 2.284 GHz (73.50%) + 2,452,402 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.79%) + 6,620,152 stalled-cycles-backend:u # 0.68% backend cycles idle (75.21%) + 1,521,504,264 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (75.56%) + 0.464614226 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x69231b0) on address 0x146a934f4000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x146d2a7f72e2 in ??? +#1 0x146d2a7f6475 in ??? +#2 0x146d28b1cdbf in ??? +#3 0x146d28b1cd2b in ??? +#4 0x146d28b1e3e4 in ??? +#5 0x146d20498d1b in ??? +#6 0x146d20492bc8 in ??? +#7 0x146d204449e6 in ??? +#8 0x146d201e66e9 in ??? +#9 0x146d28bea50e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.033389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.055879e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.055879e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.606028 sec -INFO: No Floating Point Exceptions have been reported - 4,632,143,331 cycles # 2.878 GHz - 13,180,119,009 instructions # 2.85 insn per cycle - 1.610268805 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.437636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.465805e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.465805e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.163196 sec +INFO: No Floating Point Exceptions have been reported + 4,072,883,263 cycles:u # 3.493 GHz (74.64%) + 2,355,930 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.65%) + 754,409,505 stalled-cycles-backend:u # 18.52% backend cycles idle (74.90%) + 13,147,062,402 instructions:u # 3.23 insn per cycle + # 0.06 stalled cycles per insn (75.22%) + 1.170670702 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.829571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.900649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.900649e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.915734 sec -INFO: No Floating Point Exceptions have been reported - 2,643,771,941 cycles # 2.877 GHz - 7,554,150,292 instructions # 2.86 insn per cycle - 0.919868185 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.580809e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.672695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.672695e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.658402 sec +INFO: No Floating Point Exceptions have been reported + 2,302,017,863 cycles:u # 3.480 GHz (74.62%) + 1,833,884 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.60%) + 484,320,149 stalled-cycles-backend:u # 21.04% backend cycles idle (74.61%) + 7,565,887,711 instructions:u # 3.29 insn per cycle + # 0.06 stalled cycles per insn (74.70%) + 0.666091639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.046256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.248866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.248866e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.558915 sec -INFO: No Floating Point Exceptions have been reported - 1,500,616,577 cycles # 2.669 GHz - 3,161,167,766 instructions # 2.11 insn per cycle - 0.563154837 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.750495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.076489e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.076489e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.368907 sec +INFO: No Floating Point Exceptions have been reported + 1,272,039,979 cycles:u # 3.421 GHz (74.22%) + 1,866,996 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.20%) + 291,180,446 stalled-cycles-backend:u # 22.89% backend cycles idle (74.59%) + 3,080,749,092 instructions:u # 2.42 insn per cycle + # 0.09 stalled cycles per insn (75.66%) + 0.377252207 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2873) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.429175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674618e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.498371 sec -INFO: No Floating Point Exceptions have been reported - 1,352,614,614 cycles # 2.696 GHz - 3,013,058,203 instructions # 2.23 insn per cycle - 0.502370936 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.263352e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.370712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.370712e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.746831 sec -INFO: No Floating Point Exceptions have been reported - 1,330,812,654 cycles # 1.774 GHz - 1,962,138,478 instructions # 1.47 insn per cycle - 0.751010006 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 9c9085f218..103170de36 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:04:58 -DATE: 2024-09-15_11:19:29 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.616183e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.859144e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.009356e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.455344 sec -INFO: No Floating Point Exceptions have been reported - 1,903,769,704 cycles # 2.832 GHz - 2,695,127,426 instructions # 1.42 insn per cycle - 0.728806465 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 806,546,029 cycles:u # 2.481 GHz (72.64%) + 2,466,010 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.16%) + 5,096,419 stalled-cycles-backend:u # 0.63% backend cycles idle (75.74%) + 1,270,003,199 instructions:u # 1.57 insn per cycle + # 0.00 stalled cycles per insn (76.29%) + 0.364327576 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.292895e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.269503e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.615665e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.493521 sec -INFO: No Floating Point Exceptions have been reported - 2,090,898,444 cycles # 2.835 GHz - 2,942,471,441 instructions # 1.41 insn per cycle - 0.794240657 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 929,696,813 cycles:u # 2.348 GHz (75.86%) + 2,392,351 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.86%) + 6,543,076 stalled-cycles-backend:u # 0.70% backend cycles idle (75.78%) + 1,450,864,031 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (75.65%) + 0.433195342 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x6922280) on address 0x14f938b6f000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x14fbcfc222e2 in ??? +#1 0x14fbcfc21475 in ??? +#2 0x14fbcdf47dbf in ??? +#3 0x14fbcdf47d2b in ??? +#4 0x14fbcdf493e4 in ??? +#5 0x14fbc58c3d1b in ??? +#6 0x14fbc58bdbc8 in ??? +#7 0x14fbc586f9e6 in ??? +#8 0x14fbc583b6e9 in ??? +#9 0x14fbce01550e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.085317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110333e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.528688 sec -INFO: No Floating Point Exceptions have been reported - 4,411,922,721 cycles # 2.879 GHz - 12,951,312,387 instructions # 2.94 insn per cycle - 1.532844163 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.655944e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.695014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.695014e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.010748 sec +INFO: No Floating Point Exceptions have been reported + 3,547,667,563 cycles:u # 3.500 GHz (74.61%) + 1,729,066 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.75%) + 399,816,523 stalled-cycles-backend:u # 11.27% backend cycles idle (74.75%) + 12,899,393,000 instructions:u # 3.64 insn per cycle + # 0.03 stalled cycles per insn (74.79%) + 1.018109903 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.813599e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.988360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.988360e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.600071 sec -INFO: No Floating Point Exceptions have been reported - 1,729,759,970 cycles # 2.867 GHz - 4,541,750,353 instructions # 2.63 insn per cycle - 0.604044137 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.229685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.497086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.497086e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.408927 sec +INFO: No Floating Point Exceptions have been reported + 1,418,245,736 cycles:u # 3.443 GHz (74.82%) + 1,794,172 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.76%) + 483,575,887 stalled-cycles-backend:u # 34.10% backend cycles idle (74.76%) + 4,298,766,759 instructions:u # 3.03 insn per cycle + # 0.11 stalled cycles per insn (74.76%) + 0.416911790 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.481903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.160699e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.160699e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.317183 sec -INFO: No Floating Point Exceptions have been reported - 858,921,512 cycles # 2.679 GHz - 1,917,766,555 instructions # 2.23 insn per cycle - 0.321171597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.525503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.383078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.383078e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.239138 sec +INFO: No Floating Point Exceptions have been reported + 822,711,784 cycles:u # 3.401 GHz (73.57%) + 4,475,648 stalled-cycles-frontend:u # 0.54% frontend cycles idle (73.70%) + 225,913,723 stalled-cycles-backend:u # 27.46% backend cycles idle (75.01%) + 1,863,586,483 instructions:u # 2.27 insn per cycle + # 0.12 stalled cycles per insn (76.53%) + 0.246741624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.857503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.629025e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.629025e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.298118 sec -INFO: No Floating Point Exceptions have been reported - 804,518,989 cycles # 2.670 GHz - 1,834,610,739 instructions # 2.28 insn per cycle - 0.301964643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.365317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.786659e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.786659e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.395762 sec -INFO: No Floating Point Exceptions have been reported - 728,663,796 cycles # 1.826 GHz - 1,308,267,192 instructions # 1.80 insn per cycle - 0.399787635 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index f23dffbec1..5d6b6aafb9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,133 +1,106 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:17:23 -DATE: 2024-09-15_11:48:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.986387e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.435739e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.435739e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 -TOTAL : 0.462861 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,938,742,838 cycles # 2.865 GHz - 2,865,087,008 instructions # 1.48 insn per cycle - 0.732940290 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 851,027,640 cycles:u # 2.562 GHz (72.19%) + 2,738,062 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.86%) + 21,553,583 stalled-cycles-backend:u # 2.53% backend cycles idle (75.94%) + 1,262,462,005 instructions:u # 1.48 insn per cycle + # 0.02 stalled cycles per insn (76.46%) + 0.364417631 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.036027e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.082450e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.082450e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 -TOTAL : 0.632974 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,495,936,905 cycles # 2.880 GHz - 3,785,157,902 instructions # 1.52 insn per cycle - 0.923834641 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 2,962,274,839 cycles:u # 2.921 GHz (74.51%) + 16,310,389 stalled-cycles-frontend:u # 0.55% frontend cycles idle (73.09%) + 834,349,457 stalled-cycles-backend:u # 28.17% backend cycles idle (74.26%) + 3,234,168,346 instructions:u # 1.09 insn per cycle + # 0.26 stalled cycles per insn (75.27%) + 1.049424411 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x6922280) on address 0x15252d97f000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x1527c47332e2 in ??? +#1 0x1527c4732475 in ??? +#2 0x1527c2a58dbf in ??? +#3 0x1527c2a58d2b in ??? +#4 0x1527c2a5a3e4 in ??? +#5 0x1527ba3d4d1b in ??? +#6 0x1527ba3cebc8 in ??? +#7 0x1527ba3809e6 in ??? +#8 0x1527ba34c6e9 in ??? +#9 0x1527c2b2650e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.104272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129547e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129547e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.505088 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,422,322,267 cycles # 2.932 GHz - 12,955,751,055 instructions # 2.93 insn per cycle - 1.509164533 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.655802e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.695047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.695047e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.013248 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,548,437,994 cycles:u # 3.492 GHz (74.88%) + 1,771,213 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.82%) + 400,864,942 stalled-cycles-backend:u # 11.30% backend cycles idle (74.81%) + 12,902,940,859 instructions:u # 3.64 insn per cycle + # 0.03 stalled cycles per insn (74.81%) + 1.021300390 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +108,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.849156e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.028095e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.028095e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.596878 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,746,639,368 cycles # 2.911 GHz - 4,590,056,426 instructions # 2.63 insn per cycle - 0.600772729 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.234861e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.501310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.501310e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.411015 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,439,279,123 cycles:u # 3.477 GHz (75.19%) + 1,815,921 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) + 488,545,054 stalled-cycles-backend:u # 33.94% backend cycles idle (74.89%) + 4,324,704,827 instructions:u # 3.00 insn per cycle + # 0.11 stalled cycles per insn (74.89%) + 0.418480139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +145,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.482456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.156915e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.156915e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.321160 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 875,522,703 cycles # 2.698 GHz - 1,954,476,479 instructions # 2.23 insn per cycle - 0.325091323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.908700e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.864660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.864660e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.231399 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 800,159,625 cycles:u # 3.414 GHz (73.79%) + 1,927,070 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.48%) + 228,243,229 stalled-cycles-backend:u # 28.52% backend cycles idle (76.12%) + 1,884,780,194 instructions:u # 2.36 insn per cycle + # 0.12 stalled cycles per insn (76.12%) + 0.239026086 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,80 +182,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.960780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.751467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.751467e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.296758 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 821,090,022 cycles # 2.738 GHz - 1,871,468,752 instructions # 2.28 insn per cycle - 0.300472695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.492830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.932362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.932362e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.388889 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 746,594,357 cycles # 1.904 GHz - 1,349,630,324 instructions # 1.81 insn per cycle - 0.392744433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index e2521e45b2..dd554c2ed9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:05:04 -DATE: 2024-09-15_11:19:41 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.639867e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.865475e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.015127e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.454979 sec -INFO: No Floating Point Exceptions have been reported - 1,901,448,560 cycles # 2.824 GHz - 2,678,183,164 instructions # 1.41 insn per cycle - 0.730072764 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 774,187,019 cycles:u # 2.388 GHz (76.04%) + 2,413,220 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.61%) + 11,440,480 stalled-cycles-backend:u # 1.48% backend cycles idle (76.09%) + 1,210,178,313 instructions:u # 1.56 insn per cycle + # 0.01 stalled cycles per insn (77.67%) + 0.360281997 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.246644e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.993104e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.325490e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.496812 sec -INFO: No Floating Point Exceptions have been reported - 2,083,867,357 cycles # 2.832 GHz - 2,934,470,565 instructions # 1.41 insn per cycle - 0.792648211 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 932,202,166 cycles:u # 2.345 GHz (75.92%) + 2,432,651 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.90%) + 7,458,416 stalled-cycles-backend:u # 0.80% backend cycles idle (74.38%) + 1,482,506,978 instructions:u # 1.59 insn per cycle + # 0.01 stalled cycles per insn (74.88%) + 0.433805412 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x69221d0) on address 0x14bbc70ef000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x14be5e40f2e2 in ??? +#1 0x14be5e40e475 in ??? +#2 0x14be5c736dbf in ??? +#3 0x14be5c736d2b in ??? +#4 0x14be5c7383e4 in ??? +#5 0x14be540b2d1b in ??? +#6 0x14be540acbc8 in ??? +#7 0x14be5405e9e6 in ??? +#8 0x14be5402a6e9 in ??? +#9 0x14be5c80450e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.078136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103342e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103342e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.538413 sec -INFO: No Floating Point Exceptions have been reported - 4,411,092,348 cycles # 2.861 GHz - 12,926,836,759 instructions # 2.93 insn per cycle - 1.542610115 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.634348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.672469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672469e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.023951 sec +INFO: No Floating Point Exceptions have been reported + 3,587,561,145 cycles:u # 3.494 GHz (74.91%) + 1,716,315 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.08%) + 525,478,997 stalled-cycles-backend:u # 14.65% backend cycles idle (75.08%) + 12,865,660,946 instructions:u # 3.59 insn per cycle + # 0.04 stalled cycles per insn (75.08%) + 1.031366182 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 718) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.816554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.994652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.994652e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.599654 sec -INFO: No Floating Point Exceptions have been reported - 1,728,903,265 cycles # 2.870 GHz - 4,536,279,042 instructions # 2.62 insn per cycle - 0.603646034 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.201846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.463974e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.463974e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.411230 sec +INFO: No Floating Point Exceptions have been reported + 1,441,859,421 cycles:u # 3.483 GHz (75.17%) + 1,803,436 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.89%) + 498,382,432 stalled-cycles-backend:u # 34.57% backend cycles idle (74.89%) + 4,298,169,788 instructions:u # 2.98 insn per cycle + # 0.12 stalled cycles per insn (74.89%) + 0.418909966 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.299874e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.938357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.938357e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.327791 sec -INFO: No Floating Point Exceptions have been reported - 861,849,665 cycles # 2.602 GHz - 1,914,633,101 instructions # 2.22 insn per cycle - 0.331876637 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3550) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.994732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.969698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.969698e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.226252 sec +INFO: No Floating Point Exceptions have been reported + 793,800,983 cycles:u # 3.465 GHz (74.01%) + 1,898,557 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.33%) + 246,057,450 stalled-cycles-backend:u # 31.00% backend cycles idle (75.57%) + 1,851,155,291 instructions:u # 2.33 insn per cycle + # 0.13 stalled cycles per insn (75.57%) + 0.233973767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3463) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.927651e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.715679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.715679e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.294357 sec -INFO: No Floating Point Exceptions have been reported - 802,533,600 cycles # 2.696 GHz - 1,830,391,280 instructions # 2.28 insn per cycle - 0.298329557 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3366) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.433633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.866083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.866083e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.389558 sec -INFO: No Floating Point Exceptions have been reported - 729,078,705 cycles # 1.856 GHz - 1,305,984,013 instructions # 1.79 insn per cycle - 0.393475655 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1926) (512y: 26) (512z: 2437) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 75ffaff930..8fc7eb3cac 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:05:10 -DATE: 2024-09-15_11:19:53 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.751782e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.854275e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.972003e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.462457 sec -INFO: No Floating Point Exceptions have been reported - 1,917,992,386 cycles # 2.823 GHz - 2,716,857,811 instructions # 1.42 insn per cycle - 0.737791701 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 735,623,306 cycles:u # 2.239 GHz (75.11%) + 2,161,260 stalled-cycles-frontend:u # 0.29% frontend cycles idle (78.18%) + 5,857,261 stalled-cycles-backend:u # 0.80% backend cycles idle (78.70%) + 1,335,251,931 instructions:u # 1.82 insn per cycle + # 0.00 stalled cycles per insn (75.38%) + 0.376317506 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.933049e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.480174e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.706062e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.542910 sec -INFO: No Floating Point Exceptions have been reported - 2,260,071,894 cycles # 2.877 GHz - 3,201,078,521 instructions # 1.42 insn per cycle - 0.842582370 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 1,000,994,696 cycles:u # 2.322 GHz (73.14%) + 2,515,638 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.90%) + 5,899,608 stalled-cycles-backend:u # 0.59% backend cycles idle (75.34%) + 1,495,642,750 instructions:u # 1.49 insn per cycle + # 0.00 stalled cycles per insn (75.80%) + 0.465225739 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x153861935000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x153af8c3c2e2 in ??? +#1 0x153af8c3b475 in ??? +#2 0x153af6f5fdbf in ??? +#3 0x153af6f5fd2b in ??? +#4 0x153af6f613e4 in ??? +#5 0x153aee8dbd1b in ??? +#6 0x153aee8d5bc8 in ??? +#7 0x153aee8879e6 in ??? +#8 0x153aee8536e9 in ??? +#9 0x153af702d50e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.028418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.050644e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.050644e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.613813 sec -INFO: No Floating Point Exceptions have been reported - 4,647,383,877 cycles # 2.879 GHz - 13,178,063,049 instructions # 2.84 insn per cycle - 1.618051260 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.464853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.494590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494590e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.142225 sec +INFO: No Floating Point Exceptions have been reported + 3,979,991,372 cycles:u # 3.475 GHz (74.86%) + 2,224,529 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.86%) + 513,190,678 stalled-cycles-backend:u # 12.89% backend cycles idle (74.86%) + 13,136,401,110 instructions:u # 3.30 insn per cycle + # 0.04 stalled cycles per insn (74.90%) + 1.149929388 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 706) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.864504e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934483e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934483e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.898369 sec -INFO: No Floating Point Exceptions have been reported - 2,648,200,185 cycles # 2.937 GHz - 7,475,755,342 instructions # 2.82 insn per cycle - 0.902206814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.523085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.610528e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.610528e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.672914 sec +INFO: No Floating Point Exceptions have been reported + 2,352,367,992 cycles:u # 3.481 GHz (75.15%) + 2,123,517 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.15%) + 553,046,992 stalled-cycles-backend:u # 23.51% backend cycles idle (75.14%) + 7,445,456,101 instructions:u # 3.17 insn per cycle + # 0.07 stalled cycles per insn (75.15%) + 0.680327478 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3106) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.200611e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408501e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408501e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.532171 sec -INFO: No Floating Point Exceptions have been reported - 1,476,374,652 cycles # 2.757 GHz - 3,128,702,616 instructions # 2.12 insn per cycle - 0.536024340 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3131) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.840813e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.178977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.178977e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.362116 sec +INFO: No Floating Point Exceptions have been reported + 1,267,114,391 cycles:u # 3.471 GHz (73.89%) + 1,905,595 stalled-cycles-frontend:u # 0.15% frontend cycles idle (73.50%) + 357,119,636 stalled-cycles-backend:u # 28.18% backend cycles idle (74.59%) + 3,033,671,708 instructions:u # 2.39 insn per cycle + # 0.12 stalled cycles per insn (75.91%) + 0.369300723 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3023) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.587903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854303e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.476990 sec -INFO: No Floating Point Exceptions have been reported - 1,322,669,287 cycles # 2.754 GHz - 2,982,885,294 instructions # 2.26 insn per cycle - 0.480825528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2893) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.251912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.353383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.353383e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.750102 sec -INFO: No Floating Point Exceptions have been reported - 1,363,693,421 cycles # 1.811 GHz - 1,991,339,845 instructions # 1.46 insn per cycle - 0.753947194 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2252) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 40582e53fc..d9f9231e44 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,117 +1,96 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-17_09:05:16 -DATE: 2024-09-15_11:20:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.750483e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.807638e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.927006e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.460124 sec -INFO: No Floating Point Exceptions have been reported - 1,938,834,678 cycles # 2.862 GHz - 2,712,058,421 instructions # 1.40 insn per cycle - 0.735469517 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 749,952,228 cycles:u # 2.279 GHz (74.79%) + 2,234,018 stalled-cycles-frontend:u # 0.30% frontend cycles idle (76.84%) + 11,724,657 stalled-cycles-backend:u # 1.56% backend cycles idle (74.75%) + 1,418,658,704 instructions:u # 1.89 insn per cycle + # 0.01 stalled cycles per insn (70.47%) + 0.368458862 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.925117e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.366077e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.584033e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.544645 sec -INFO: No Floating Point Exceptions have been reported - 2,249,855,314 cycles # 2.863 GHz - 3,222,814,057 instructions # 1.43 insn per cycle - 0.844200129 seconds time elapsed +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 989,017,877 cycles:u # 2.307 GHz (73.51%) + 2,484,396 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.78%) + 5,232,865 stalled-cycles-backend:u # 0.53% backend cycles idle (75.77%) + 1,489,322,805 instructions:u # 1.51 insn per cycle + # 0.00 stalled cycles per insn (75.07%) + 0.516106931 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Memory access fault by GPU node-4 (Agent handle: 0x69231b0) on address 0x15424d2a4000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x1544e438e2e2 in ??? +#1 0x1544e438d475 in ??? +#2 0x1544e26b2dbf in ??? +#3 0x1544e26b2d2b in ??? +#4 0x1544e26b43e4 in ??? +#5 0x1544da02ed1b in ??? +#6 0x1544da028bc8 in ??? +#7 0x1544d9fda9e6 in ??? +#8 0x1544d9fa66e9 in ??? +#9 0x1544e278050e in ??? +#10 0xffffffffffffffff in ??? +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = +ERROR! Fortran calculation (F77/GPU) crashed ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.054095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076295e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076295e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.574055 sec -INFO: No Floating Point Exceptions have been reported - 4,641,890,435 cycles # 2.943 GHz - 13,165,898,661 instructions # 2.84 insn per cycle - 1.578249512 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.460795e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.490218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.490218e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.145702 sec +INFO: No Floating Point Exceptions have been reported + 4,008,278,939 cycles:u # 3.490 GHz (75.15%) + 2,042,355 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.95%) + 713,021,142 stalled-cycles-backend:u # 17.79% backend cycles idle (74.93%) + 13,135,943,398 instructions:u # 3.28 insn per cycle + # 0.05 stalled cycles per insn (74.93%) + 1.153070404 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 697) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +98,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.867370e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936884e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.896912 sec -INFO: No Floating Point Exceptions have been reported - 2,636,737,245 cycles # 2.930 GHz - 7,477,755,477 instructions # 2.84 insn per cycle - 0.900719288 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3142) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.419703e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.500272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.500272e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.700641 sec +INFO: No Floating Point Exceptions have been reported + 2,440,009,345 cycles:u # 3.468 GHz (75.00%) + 1,871,517 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.99%) + 628,564,983 stalled-cycles-backend:u # 25.76% backend cycles idle (74.99%) + 7,437,101,160 instructions:u # 3.05 insn per cycle + # 0.08 stalled cycles per insn (74.99%) + 0.708195858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3097) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +133,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.202775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.410191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.410191e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.531412 sec -INFO: No Floating Point Exceptions have been reported - 1,468,072,782 cycles # 2.747 GHz - 3,129,202,339 instructions # 2.13 insn per cycle - 0.535248151 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3109) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.814876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.149180e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.149180e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.363790 sec +INFO: No Floating Point Exceptions have been reported + 1,273,036,982 cycles:u # 3.472 GHz (74.35%) + 2,456,139 stalled-cycles-frontend:u # 0.19% frontend cycles idle (73.87%) + 263,152,821 stalled-cycles-backend:u # 20.67% backend cycles idle (73.96%) + 3,058,369,150 instructions:u # 2.40 insn per cycle + # 0.09 stalled cycles per insn (74.83%) + 0.370999366 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3001) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +168,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.576512e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841608e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.477966 sec -INFO: No Floating Point Exceptions have been reported - 1,324,577,804 cycles # 2.753 GHz - 2,983,698,636 instructions # 2.25 insn per cycle - 0.481692847 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2869) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.229034e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.331255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.331255e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.757523 sec -INFO: No Floating Point Exceptions have been reported - 1,366,953,688 cycles # 1.797 GHz - 1,991,556,146 instructions # 1.46 insn per cycle - 0.761326972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2252) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index a10430f205..e8f148c35c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:00 -DATE: 2024-09-15_12:19:52 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.222962e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.849418e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.427313e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531972 sec -INFO: No Floating Point Exceptions have been reported - 2,207,295,929 cycles # 2.875 GHz - 3,148,652,719 instructions # 1.43 insn per cycle - 0.824191400 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.605446e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642912e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642912e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.682964 sec -INFO: No Floating Point Exceptions have been reported - 19,598,347,374 cycles # 2.930 GHz - 52,065,080,941 instructions # 2.66 insn per cycle - 6.694844262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926105795 -Relative difference = 2.1036172727915933e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.916629e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.051442e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.051442e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.747547 sec -INFO: No Floating Point Exceptions have been reported - 11,065,354,139 cycles # 2.943 GHz - 30,912,254,749 instructions # 2.79 insn per cycle - 3.760361851 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926105795 -Relative difference = 2.1036172727915933e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.668387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.008748e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.008748e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.399916 sec -INFO: No Floating Point Exceptions have been reported - 6,627,221,489 cycles # 2.749 GHz - 13,792,796,598 instructions # 2.08 insn per cycle - 2.412653295 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.129922e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.540308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.540308e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.198225 sec -INFO: No Floating Point Exceptions have been reported - 6,104,431,058 cycles # 2.762 GHz - 13,134,794,290 instructions # 2.15 insn per cycle - 2.210920696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 897,787,561 cycles:u # 0.606 GHz (74.82%) + 2,322,322 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.91%) + 12,676,754 stalled-cycles-backend:u # 1.41% backend cycles idle (74.84%) + 1,488,185,060 instructions:u # 1.66 insn per cycle + # 0.01 stalled cycles per insn (75.24%) + 1.536285594 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.449384e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.629220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.629220e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.193674 sec -INFO: No Floating Point Exceptions have been reported - 5,993,463,965 cycles # 1.870 GHz - 8,712,960,993 instructions # 1.45 insn per cycle - 3.206184057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1943) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 4.3134710926110271 + File "", line 1 + me1=; me2=4.3134710926110271; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 01ceafd1da..960adafc8d 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:03 -DATE: 2024-09-15_12:20:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.181438e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.797209e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.367517e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531302 sec -INFO: No Floating Point Exceptions have been reported - 2,216,417,295 cycles # 2.883 GHz - 3,137,968,070 instructions # 1.42 insn per cycle - 0.825226040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.706608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.748828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.748828e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.299466 sec -INFO: No Floating Point Exceptions have been reported - 18,540,883,021 cycles # 2.938 GHz - 50,178,474,604 instructions # 2.71 insn per cycle - 6.311951743 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926105795 -Relative difference = 2.1036172727915933e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.062664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.211274e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.211274e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.576786 sec -INFO: No Floating Point Exceptions have been reported - 10,549,321,378 cycles # 2.940 GHz - 29,289,408,214 instructions # 2.78 insn per cycle - 3.589213709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926105795 -Relative difference = 2.1036172727915933e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.340015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.632096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.632096e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.568858 sec -INFO: No Floating Point Exceptions have been reported - 7,118,801,409 cycles # 2.759 GHz - 15,276,261,936 instructions # 2.15 insn per cycle - 2.581007821 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3021) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.507726e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.822175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.822175e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.479110 sec -INFO: No Floating Point Exceptions have been reported - 6,890,334,799 cycles # 2.767 GHz - 14,747,969,860 instructions # 2.14 insn per cycle - 2.491499387 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2617) (512y: 302) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 908,292,704 cycles:u # 2.400 GHz (73.50%) + 2,392,771 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.74%) + 7,118,916 stalled-cycles-backend:u # 0.78% backend cycles idle (74.74%) + 1,496,479,891 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (75.69%) + 0.415323394 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.315391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.482899e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.482899e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.316209 sec -INFO: No Floating Point Exceptions have been reported - 6,207,380,257 cycles # 1.865 GHz - 10,464,609,822 instructions # 1.69 insn per cycle - 3.328585456 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2130) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 4.3134710926110271 + File "", line 1 + me1=; me2=4.3134710926110271; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 2ef1c54aa0..c6baeb710f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:06 -DATE: 2024-09-15_12:20:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.552559e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.511007e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.603394e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.491259 sec -INFO: No Floating Point Exceptions have been reported - 2,068,896,846 cycles # 2.881 GHz - 2,979,901,367 instructions # 1.44 insn per cycle - 0.776426531 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 796,145,514 cycles:u # 2.320 GHz (74.49%) + 2,394,866 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.45%) + 12,718,740 stalled-cycles-backend:u # 1.60% backend cycles idle (74.52%) + 1,443,982,349 instructions:u # 1.81 insn per cycle + # 0.01 stalled cycles per insn (74.44%) + 0.380527612 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,180 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.683914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.725672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.725672e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.333148 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,682,476,989 cycles # 2.947 GHz - 51,267,470,348 instructions # 2.74 insn per cycle - 6.341547157 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.015012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.280109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.280109e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.718050 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,007,664,837 cycles # 2.940 GHz - 19,370,996,217 instructions # 2.42 insn per cycle - 2.726376718 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.789023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799247e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.452394 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,027,415,196 cycles # 2.759 GHz - 8,886,566,152 instructions # 2.21 insn per cycle - 1.460503609 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.322323e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.475598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.475598e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.366660 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,807,036,059 cycles # 2.770 GHz - 8,489,981,547 instructions # 2.23 insn per cycle - 1.374788749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3543) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.974329e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.534282e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.534282e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.862736 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,570,392,990 cycles # 1.910 GHz - 6,298,404,091 instructions # 1.76 insn per cycle - 1.870756064 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2323) (512y: 24) (512z: 2290) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 4.3135525361867622 + File "", line 1 + me1=; me2=4.3135525361867622; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 479ebdb204..a95f0eb2dd 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:08 -DATE: 2024-09-15_12:21:08 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.776065e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.594605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.702542e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.491621 sec -INFO: No Floating Point Exceptions have been reported - 2,069,264,305 cycles # 2.877 GHz - 2,928,235,838 instructions # 1.42 insn per cycle - 0.775773692 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 832,179,582 cycles:u # 2.431 GHz (74.40%) + 2,456,119 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.73%) + 12,748,511 stalled-cycles-backend:u # 1.53% backend cycles idle (75.16%) + 1,457,243,610 instructions:u # 1.75 insn per cycle + # 0.01 stalled cycles per insn (75.09%) + 0.380517703 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,184 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.731608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.775696e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.775696e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.161667 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,113,353,892 cycles # 2.937 GHz - 49,656,566,510 instructions # 2.74 insn per cycle - 6.170127822 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.528214e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.868162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.868162e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.421156 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,159,836,216 cycles # 2.949 GHz - 18,538,672,579 instructions # 2.59 insn per cycle - 2.429136947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3234) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.353305e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.808520e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.808520e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.063917 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,718,285,058 cycles # 2.760 GHz - 10,903,070,951 instructions # 1.91 insn per cycle - 2.072527320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.452070e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.924355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.924355e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.029021 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,634,694,028 cycles # 2.767 GHz - 10,598,235,094 instructions # 1.88 insn per cycle - 2.037144953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4135) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.351507e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.637189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.637189e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.516038 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,700,188,022 cycles # 1.863 GHz - 8,712,811,590 instructions # 1.85 insn per cycle - 2.524039667 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 4.3135525361867622 + File "", line 1 + me1=; me2=4.3135525361867622; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 7f1052231e..211ebcec94 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:10 -DATE: 2024-09-15_12:21:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.233444e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.828530e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.380985e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531166 sec -INFO: No Floating Point Exceptions have been reported - 2,204,089,924 cycles # 2.871 GHz - 3,157,003,197 instructions # 1.43 insn per cycle - 0.824717149 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 889,655,717 cycles:u # 2.333 GHz (74.90%) + 2,361,297 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.89%) + 6,408,772 stalled-cycles-backend:u # 0.72% backend cycles idle (73.67%) + 1,471,162,588 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 0.416404111 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,184 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 4.3134711012809239 -Relative difference = 2.0835166567625394e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.513438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546541e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 7.083035 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 20,836,157,287 cycles # 2.937 GHz - 52,059,859,689 instructions # 2.50 insn per cycle - 7.095325403 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134711778082178 -Relative difference = 1.906102050071626e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.708543e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.825114e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.825114e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 4.025851 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,676,241,949 cycles # 2.892 GHz - 30,719,909,890 instructions # 2.63 insn per cycle - 4.038601753 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2971) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134711778082178 -Relative difference = 1.906102050071626e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.506319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.824086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.824086e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.479724 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,856,020,570 cycles # 2.752 GHz - 13,733,686,621 instructions # 2.00 insn per cycle - 2.492002268 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.929662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.305854e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.305854e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.278940 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,324,440,516 cycles # 2.761 GHz - 13,099,663,654 instructions # 2.07 insn per cycle - 2.291244442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.121063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.267193e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.267193e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.513123 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,549,229,378 cycles # 1.858 GHz - 8,826,958,587 instructions # 1.35 insn per cycle - 3.525479379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2013) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=4.3134711012809239; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index b5ff528c40..27caaa9b9d 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-17_09:26:13 -DATE: 2024-09-15_12:21:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.263927e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.696044e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.276288e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.532325 sec -INFO: No Floating Point Exceptions have been reported - 2,205,625,214 cycles # 2.868 GHz - 3,174,502,414 instructions # 1.44 insn per cycle - 0.825987629 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 903,069,189 cycles:u # 2.385 GHz (73.83%) + 2,287,403 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.71%) + 7,735,428 stalled-cycles-backend:u # 0.86% backend cycles idle (75.40%) + 1,453,998,206 instructions:u # 1.61 insn per cycle + # 0.01 stalled cycles per insn (75.53%) + 0.415028548 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,184 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313472e+00 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 4.3134711012809239 -Relative difference = 2.0835166567625394e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.606903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.644535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.644535e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.677680 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 19,666,494,079 cycles # 2.940 GHz - 50,081,060,677 instructions # 2.55 insn per cycle - 6.689882991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134711778082178 -Relative difference = 1.906102050071626e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.871340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.003903e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.003903e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.840726 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,259,304,877 cycles # 2.923 GHz - 29,230,934,183 instructions # 2.60 insn per cycle - 3.852980170 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2807) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134711778082178 -Relative difference = 1.906102050071626e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.726077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.943109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.943109e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.970419 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,229,432,856 cycles # 2.759 GHz - 15,297,097,015 instructions # 1.86 insn per cycle - 2.983820409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3202) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.908170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147639e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.839910 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,866,112,540 cycles # 2.759 GHz - 14,608,431,526 instructions # 1.86 insn per cycle - 2.852893659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 304) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.030076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.169205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.169205e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.616163 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,699,969,895 cycles # 1.847 GHz - 10,018,865,936 instructions # 1.50 insn per cycle - 3.629335211 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2217) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=4.3134711012809239; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 7707f676a6..8f00e506d3 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:40 -DATE: 2024-09-15_12:18:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.767516e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.784818e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.787795e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470421 sec -INFO: No Floating Point Exceptions have been reported - 1,978,272,924 cycles # 2.864 GHz - 2,912,164,766 instructions # 1.47 insn per cycle - 0.749211691 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 954,974,630 cycles:u # 0.908 GHz (75.47%) + 2,343,671 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.33%) + 8,919,960 stalled-cycles-backend:u # 0.93% backend cycles idle (75.25%) + 1,500,525,229 instructions:u # 1.57 insn per cycle + # 0.01 stalled cycles per insn (74.16%) + 1.096050604 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.005244e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.117313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.126114e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483026 sec -INFO: No Floating Point Exceptions have been reported - 2,024,260,948 cycles # 2.878 GHz - 3,029,497,927 instructions # 1.50 insn per cycle - 0.762830166 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.405701e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.409074e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409074e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157165 sec -INFO: No Floating Point Exceptions have been reported - 467,074,127 cycles # 2.919 GHz - 1,389,682,298 instructions # 2.98 insn per cycle - 0.160520641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 1,161,046,200 cycles:u # 2.722 GHz (75.01%) + 2,328,012 stalled-cycles-frontend:u # 0.20% frontend cycles idle (73.94%) + 11,393,051 stalled-cycles-backend:u # 0.98% backend cycles idle (73.28%) + 1,722,237,964 instructions:u # 1.48 insn per cycle + # 0.01 stalled cycles per insn (72.94%) + 0.464357458 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167185E-006 -Relative difference = 3.339276495559746e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.459230e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.470849e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.470849e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.083614 sec -INFO: No Floating Point Exceptions have been reported - 239,038,405 cycles # 2.765 GHz - 692,921,675 instructions # 2.90 insn per cycle - 0.087016440 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.419984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.425694e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.425694e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039288 sec -INFO: No Floating Point Exceptions have been reported - 113,366,397 cycles # 2.696 GHz - 257,996,166 instructions # 2.28 insn per cycle - 0.042698199 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8503) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.624961e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.632288e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.632288e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034517 sec -INFO: No Floating Point Exceptions have been reported - 101,263,068 cycles # 2.711 GHz - 239,969,377 instructions # 2.37 insn per cycle - 0.037861089 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8140) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.199166e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204857e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204857e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046301 sec -INFO: No Floating Point Exceptions have been reported - 89,031,390 cycles # 1.806 GHz - 134,346,666 instructions # 1.51 insn per cycle - 0.049794003 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7090) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1274562860176587E-006 + File "", line 1 + me1=; me2=8.1274562860176587E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index ca3a407fd8..e4bab9232e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:44 -DATE: 2024-09-15_12:18:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.802842e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821481e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.824642e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470683 sec -INFO: No Floating Point Exceptions have been reported - 1,990,204,132 cycles # 2.870 GHz - 2,908,985,105 instructions # 1.46 insn per cycle - 0.750697991 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 952,666,848 cycles:u # 2.620 GHz (75.10%) + 2,413,575 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.93%) + 5,955,743 stalled-cycles-backend:u # 0.63% backend cycles idle (75.86%) + 1,502,436,811 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (73.71%) + 0.401055061 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.083966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.205394e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.213656e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.487582 sec -INFO: No Floating Point Exceptions have been reported - 2,025,056,560 cycles # 2.852 GHz - 2,989,421,142 instructions # 1.48 insn per cycle - 0.769117174 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.394566e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.397741e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.397741e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157033 sec -INFO: No Floating Point Exceptions have been reported - 465,720,728 cycles # 2.911 GHz - 1,385,003,144 instructions # 2.97 insn per cycle - 0.160593741 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 1,161,301,632 cycles:u # 2.736 GHz (75.44%) + 2,522,146 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.76%) + 5,014,212 stalled-cycles-backend:u # 0.43% backend cycles idle (73.83%) + 1,663,926,319 instructions:u # 1.43 insn per cycle + # 0.00 stalled cycles per insn (73.66%) + 0.461818006 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167185E-006 -Relative difference = 3.339276495559746e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.474186e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.485931e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.485931e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.082867 sec -INFO: No Floating Point Exceptions have been reported - 237,575,401 cycles # 2.770 GHz - 689,116,420 instructions # 2.90 insn per cycle - 0.086305788 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860167168E-006 -Relative difference = 3.3392764976441195e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.436754e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442531e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038039 sec -INFO: No Floating Point Exceptions have been reported - 110,520,646 cycles # 2.700 GHz - 253,448,082 instructions # 2.29 insn per cycle - 0.041474271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8458) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.611056e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.618327e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.618327e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034052 sec -INFO: No Floating Point Exceptions have been reported - 98,863,837 cycles # 2.687 GHz - 235,605,174 instructions # 2.38 insn per cycle - 0.037353270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8098) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.176506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181658e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046442 sec -INFO: No Floating Point Exceptions have been reported - 86,647,290 cycles # 1.764 GHz - 129,720,267 instructions # 1.50 insn per cycle - 0.049837932 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7094) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1274562860176587E-006 + File "", line 1 + me1=; me2=8.1274562860176587E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 0df257cc6a..66c4896a7e 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:47 -DATE: 2024-09-15_12:19:09 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.204232e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.214249e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.216451e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.474860 sec -INFO: No Floating Point Exceptions have been reported - 1,958,003,333 cycles # 2.836 GHz - 2,859,472,548 instructions # 1.46 insn per cycle - 0.747968607 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 893,896,815 cycles:u # 2.583 GHz (74.74%) + 2,477,469 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.82%) + 5,175,634 stalled-cycles-backend:u # 0.58% backend cycles idle (75.94%) + 1,396,921,239 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (75.93%) + 0.384874288 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.933159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.016706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.024424e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 -TOTAL : 0.477251 sec -INFO: No Floating Point Exceptions have been reported - 1,992,584,596 cycles # 2.867 GHz - 2,884,692,368 instructions # 1.45 insn per cycle - 0.751767434 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272869669930272E-006 -Relative difference = 4.548524165778887e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.431583e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.434922e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.434922e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.155945 sec -INFO: No Floating Point Exceptions have been reported - 462,855,819 cycles # 2.915 GHz - 1,381,844,785 instructions # 2.99 insn per cycle - 0.159290331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 1,046,849,130 cycles:u # 2.701 GHz (76.03%) + 2,443,461 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.54%) + 5,320,923 stalled-cycles-backend:u # 0.51% backend cycles idle (74.83%) + 1,524,354,123 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (75.34%) + 0.421507472 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.210882e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.215211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.215211e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.045512 sec -INFO: No Floating Point Exceptions have been reported - 131,360,157 cycles # 2.718 GHz - 372,013,509 instructions # 2.83 insn per cycle - 0.048801319 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.769306e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.791871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.791871e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021120 sec -INFO: No Floating Point Exceptions have been reported - 64,157,831 cycles # 2.680 GHz - 142,829,765 instructions # 2.23 insn per cycle - 0.024479209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9251) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.078002e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.105354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.105354e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.019156 sec -INFO: No Floating Point Exceptions have been reported - 59,143,033 cycles # 2.685 GHz - 132,774,537 instructions # 2.24 insn per cycle - 0.022562262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8963) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.363857e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.386270e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.386270e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.024531 sec -INFO: No Floating Point Exceptions have been reported - 51,349,038 cycles # 1.867 GHz - 79,557,658 instructions # 1.55 insn per cycle - 0.028087213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2834) (512y: 32) (512z: 7442) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1275160277913510E-006 + File "", line 1 + me1=; me2=8.1275160277913510E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 1f92901611..e434d3365f 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:50 -DATE: 2024-09-15_12:19:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.234747e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.244150e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246143e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.473072 sec -INFO: No Floating Point Exceptions have been reported - 1,983,051,703 cycles # 2.868 GHz - 2,920,120,611 instructions # 1.47 insn per cycle - 0.748230768 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 899,328,589 cycles:u # 2.601 GHz (74.71%) + 2,369,783 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.93%) + 6,083,724 stalled-cycles-backend:u # 0.68% backend cycles idle (76.87%) + 1,389,875,106 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (75.80%) + 0.384557491 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.099236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.191571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.199199e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.474597 sec -INFO: No Floating Point Exceptions have been reported - 1,988,967,454 cycles # 2.876 GHz - 2,944,964,203 instructions # 1.48 insn per cycle - 0.748107743 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272866419447706E-006 -Relative difference = 4.508529302013153e-06 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.448809e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.452114e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.452114e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.154406 sec -INFO: No Floating Point Exceptions have been reported - 460,841,033 cycles # 2.931 GHz - 1,376,637,796 instructions # 2.99 insn per cycle - 0.157690889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 1,078,310,516 cycles:u # 2.799 GHz (74.46%) + 2,431,227 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) + 6,358,432 stalled-cycles-backend:u # 0.59% backend cycles idle (74.29%) + 1,506,219,935 instructions:u # 1.40 insn per cycle + # 0.00 stalled cycles per insn (75.15%) + 0.421863166 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.217964e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.222354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.222354e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.044518 sec -INFO: No Floating Point Exceptions have been reported - 129,447,390 cycles # 2.729 GHz - 367,192,934 instructions # 2.84 insn per cycle - 0.047990838 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.769546e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.792490e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.792490e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020349 sec -INFO: No Floating Point Exceptions have been reported - 62,145,033 cycles # 2.684 GHz - 138,048,264 instructions # 2.22 insn per cycle - 0.023682982 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9205) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.058079e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.086570e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.086570e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.018477 sec -INFO: No Floating Point Exceptions have been reported - 56,677,502 cycles # 2.660 GHz - 127,963,925 instructions # 2.26 insn per cycle - 0.021825959 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8919) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.337142e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.358958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.358958e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.023953 sec -INFO: No Floating Point Exceptions have been reported - 48,824,483 cycles # 1.820 GHz - 74,785,723 instructions # 1.53 insn per cycle - 0.027430916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2789) (512y: 32) (512z: 7444) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1275164883853706E-006 + File "", line 1 + me1=; me2=8.1275164883853706E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index c9ae973486..67e8719f10 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:53 -DATE: 2024-09-15_12:19:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.749294e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.767595e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.770609e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467392 sec -INFO: No Floating Point Exceptions have been reported - 1,983,595,553 cycles # 2.874 GHz - 2,922,486,219 instructions # 1.47 insn per cycle - 0.746529670 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 956,981,603 cycles:u # 2.620 GHz (75.83%) + 2,517,542 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.97%) + 5,371,065 stalled-cycles-backend:u # 0.56% backend cycles idle (76.05%) + 1,451,394,083 instructions:u # 1.52 insn per cycle + # 0.00 stalled cycles per insn (75.46%) + 0.401330468 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.927630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.040034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.047831e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485867 sec -INFO: No Floating Point Exceptions have been reported - 2,031,462,606 cycles # 2.875 GHz - 3,037,983,552 instructions # 1.50 insn per cycle - 0.765937206 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.382949e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.386200e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386200e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.158173 sec -INFO: No Floating Point Exceptions have been reported - 471,387,733 cycles # 2.929 GHz - 1,398,281,899 instructions # 2.97 insn per cycle - 0.161473463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 1,169,981,271 cycles:u # 2.750 GHz (75.55%) + 2,593,245 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.67%) + 6,057,770 stalled-cycles-backend:u # 0.52% backend cycles idle (75.69%) + 1,678,272,959 instructions:u # 1.43 insn per cycle + # 0.00 stalled cycles per insn (74.88%) + 0.461591805 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562948736117E-006 -Relative difference = 3.32837900190667e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.673807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.686050e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.686050e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080997 sec -INFO: No Floating Point Exceptions have been reported - 235,160,008 cycles # 2.808 GHz - 688,033,850 instructions # 2.93 insn per cycle - 0.084339129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9328) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563175290919E-006 -Relative difference = 3.3005037703909805e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.415459e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422136e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422136e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039362 sec -INFO: No Floating Point Exceptions have been reported - 112,339,380 cycles # 2.665 GHz - 253,052,093 instructions # 2.25 insn per cycle - 0.042695307 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.648852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.656658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.656658e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034079 sec -INFO: No Floating Point Exceptions have been reported - 100,217,114 cycles # 2.715 GHz - 233,607,212 instructions # 2.33 insn per cycle - 0.037476380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.192314e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197366e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197366e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046499 sec -INFO: No Floating Point Exceptions have been reported - 89,493,670 cycles # 1.812 GHz - 133,128,515 instructions # 1.49 insn per cycle - 0.049962595 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6356) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1274562879405183E-006 + File "", line 1 + me1=; me2=8.1274562879405183E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 1d81f994cb..98d29a7943 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,117 +1,51 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-17_09:25:57 -DATE: 2024-09-15_12:19:41 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.765961e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.783708e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789350e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.474700 sec -INFO: No Floating Point Exceptions have been reported - 1,974,719,372 cycles # 2.833 GHz - 2,899,642,626 instructions # 1.47 insn per cycle - 0.754986373 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 944,021,735 cycles:u # 2.642 GHz (75.58%) + 2,477,735 stalled-cycles-frontend:u # 0.26% frontend cycles idle (72.94%) + 6,413,288 stalled-cycles-backend:u # 0.68% backend cycles idle (71.73%) + 1,495,341,386 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (74.19%) + 0.395987038 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.058757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.171730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.179415e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.484755 sec -INFO: No Floating Point Exceptions have been reported - 2,032,400,386 cycles # 2.878 GHz - 3,034,442,470 instructions # 1.49 insn per cycle - 0.765490241 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.419840e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.423095e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423095e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.155760 sec -INFO: No Floating Point Exceptions have been reported - 467,249,665 cycles # 2.946 GHz - 1,393,566,061 instructions # 2.98 insn per cycle - 0.159156822 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 1,167,852,448 cycles:u # 2.766 GHz (75.01%) + 2,310,511 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.65%) + 5,844,521 stalled-cycles-backend:u # 0.50% backend cycles idle (75.26%) + 1,703,248,077 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (73.13%) + 0.458560013 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,140 +53,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562948736117E-006 -Relative difference = 3.32837900190667e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.647634e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.659890e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.659890e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080628 sec -INFO: No Floating Point Exceptions have been reported - 234,377,416 cycles # 2.808 GHz - 684,139,763 instructions # 2.92 insn per cycle - 0.083918243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9361) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563175290919E-006 -Relative difference = 3.3005037703909805e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.444361e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.450464e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.450464e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037847 sec -INFO: No Floating Point Exceptions have been reported - 110,057,998 cycles # 2.704 GHz - 248,602,467 instructions # 2.26 insn per cycle - 0.041225455 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.658647e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.666343e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.666343e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.033122 sec -INFO: No Floating Point Exceptions have been reported - 97,824,445 cycles # 2.731 GHz - 229,151,030 instructions # 2.34 insn per cycle - 0.036353420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.188861e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.193842e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193842e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046038 sec -INFO: No Floating Point Exceptions have been reported - 87,629,988 cycles # 1.795 GHz - 128,556,729 instructions # 1.47 insn per cycle - 0.049600721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6356) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 8.1274562879405183E-006 + File "", line 1 + me1=; me2=8.1274562879405183E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 808bf6828b..581385cc9d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:25 -DATE: 2024-09-15_12:17:33 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.071674e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.333003e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.756234e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.519163 sec -INFO: No Floating Point Exceptions have been reported - 2,179,162,165 cycles # 2.882 GHz - 3,070,881,799 instructions # 1.41 insn per cycle - 0.812256060 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 776,488,159 cycles:u # 0.534 GHz (74.80%) + 2,516,553 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.95%) + 8,869,181 stalled-cycles-backend:u # 1.14% backend cycles idle (75.12%) + 1,356,671,347 instructions:u # 1.75 insn per cycle + # 0.01 stalled cycles per insn (75.14%) + 1.507943316 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477196e-01 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 0.14771956172964262 -Relative difference = 2.590743366698123e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.736392e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.961242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.961242e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.358463 sec -INFO: No Floating Point Exceptions have been reported - 3,905,907,731 cycles # 2.851 GHz - 9,863,781,254 instructions # 2.53 insn per cycle - 1.371009162 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.459599e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.873378e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.873378e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.877453 sec -INFO: No Floating Point Exceptions have been reported - 2,486,018,663 cycles # 2.796 GHz - 6,068,811,134 instructions # 2.44 insn per cycle - 0.890013058 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.202336e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.241982e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.241982e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.633614 sec -INFO: No Floating Point Exceptions have been reported - 1,818,277,006 cycles # 2.816 GHz - 3,450,832,845 instructions # 1.90 insn per cycle - 0.646259584 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.283565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.391222e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.391222e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.617501 sec -INFO: No Floating Point Exceptions have been reported - 1,780,688,704 cycles # 2.829 GHz - 3,420,263,634 instructions # 1.92 insn per cycle - 0.630172459 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.121818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.051040e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.051040e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.653754 sec -INFO: No Floating Point Exceptions have been reported - 1,527,075,900 cycles # 2.294 GHz - 2,560,289,188 instructions # 1.68 insn per cycle - 0.666212420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=0.14771956172964262; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 06cbb3e926..e79f7c2a1d 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:29 -DATE: 2024-09-15_12:17:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.969739e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.449933e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971179e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.523956 sec -INFO: No Floating Point Exceptions have been reported - 2,197,273,631 cycles # 2.867 GHz - 3,116,260,127 instructions # 1.42 insn per cycle - 0.822598423 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 776,650,730 cycles:u # 2.240 GHz (74.89%) + 2,405,347 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.86%) + 5,861,901 stalled-cycles-backend:u # 0.75% backend cycles idle (74.11%) + 1,353,641,108 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (75.74%) + 0.383423982 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477196e-01 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 0.14771956172964262 -Relative difference = 2.590743366698123e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.011162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.031790e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.031790e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.321281 sec -INFO: No Floating Point Exceptions have been reported - 3,893,594,822 cycles # 2.920 GHz - 9,744,555,445 instructions # 2.50 insn per cycle - 1.334263922 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.381914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838565e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838565e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.917902 sec -INFO: No Floating Point Exceptions have been reported - 2,659,815,270 cycles # 2.862 GHz - 6,026,660,919 instructions # 2.27 insn per cycle - 0.930359460 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.192597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.249251e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.249251e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.636612 sec -INFO: No Floating Point Exceptions have been reported - 1,830,188,885 cycles # 2.821 GHz - 3,421,758,036 instructions # 1.87 insn per cycle - 0.649438298 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.271531e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.393290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.393290e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.619901 sec -INFO: No Floating Point Exceptions have been reported - 1,782,873,131 cycles # 2.820 GHz - 3,395,941,059 instructions # 1.90 insn per cycle - 0.632832806 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.134142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.070820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.070820e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.650853 sec -INFO: No Floating Point Exceptions have been reported - 1,547,796,135 cycles # 2.334 GHz - 2,545,431,106 instructions # 1.64 insn per cycle - 0.664117617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=0.14771956172964262; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 6bbc9fb0da..aad2582b55 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:31 -DATE: 2024-09-15_12:17:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.000684e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.992629e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.388281e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.488697 sec -INFO: No Floating Point Exceptions have been reported - 2,047,674,909 cycles # 2.864 GHz - 2,921,802,724 instructions # 1.43 insn per cycle - 0.773667864 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956735057756 -Relative difference = 4.559355911674916e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.029870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042886e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042886e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.272870 sec -INFO: No Floating Point Exceptions have been reported - 3,752,313,957 cycles # 2.930 GHz - 9,659,106,684 instructions # 2.57 insn per cycle - 1.281538641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956094773486 -Relative difference = 2.643675256627469e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.197021e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.333033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.333033e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.588107 sec -INFO: No Floating Point Exceptions have been reported - 1,715,029,446 cycles # 2.877 GHz - 4,025,277,973 instructions # 2.35 insn per cycle - 0.596790312 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.961392e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.263984e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.263984e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.463961 sec -INFO: No Floating Point Exceptions have been reported - 1,335,854,072 cycles # 2.831 GHz - 2,555,445,671 instructions # 1.91 insn per cycle - 0.472547002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.064902e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.593039e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.593039e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.451960 sec -INFO: No Floating Point Exceptions have been reported - 1,308,601,918 cycles # 2.845 GHz - 2,529,434,362 instructions # 1.93 insn per cycle - 0.460618771 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1867) (512y: 1) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 744,091,384 cycles:u # 2.269 GHz (75.70%) + 2,340,999 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.58%) + 6,500,181 stalled-cycles-backend:u # 0.87% backend cycles idle (75.05%) + 1,351,248,193 instructions:u # 1.82 insn per cycle + # 0.00 stalled cycles per insn (75.50%) + 0.383825815 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.884808e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.904943e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.904943e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.475883 sec -INFO: No Floating Point Exceptions have been reported - 1,154,211,341 cycles # 2.384 GHz - 2,131,381,757 instructions # 1.85 insn per cycle - 0.484642507 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 0.14771957969060168 + File "", line 1 + me1=; me2=0.14771957969060168; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 5f533fb3cd..d77f223d79 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:33 -DATE: 2024-09-15_12:18:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.019765e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.955728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.339790e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.485800 sec -INFO: No Floating Point Exceptions have been reported - 2,046,836,184 cycles # 2.868 GHz - 2,861,763,521 instructions # 1.40 insn per cycle - 0.770456449 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956525510177 -Relative difference = 4.4175008557828484e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.176079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.058039e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.251903 sec -INFO: No Floating Point Exceptions have been reported - 3,703,836,740 cycles # 2.940 GHz - 9,528,821,992 instructions # 2.57 insn per cycle - 1.260572218 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956094773486 -Relative difference = 2.643675256627469e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.192175e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.322296e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.322296e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.587889 sec -INFO: No Floating Point Exceptions have been reported - 1,712,573,858 cycles # 2.874 GHz - 3,991,164,090 instructions # 2.33 insn per cycle - 0.596469979 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.984131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.291891e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.291891e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.460621 sec -INFO: No Floating Point Exceptions have been reported - 1,332,768,943 cycles # 2.844 GHz - 2,539,760,549 instructions # 1.91 insn per cycle - 0.469223881 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1815) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.068338e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.608348e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.608348e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.451660 sec -INFO: No Floating Point Exceptions have been reported - 1,303,705,490 cycles # 2.835 GHz - 2,516,660,988 instructions # 1.93 insn per cycle - 0.460426647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1776) (512y: 1) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 766,680,046 cycles:u # 2.357 GHz (75.66%) + 2,418,794 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.35%) + 7,397,866 stalled-cycles-backend:u # 0.96% backend cycles idle (75.27%) + 1,393,110,146 instructions:u # 1.82 insn per cycle + # 0.01 stalled cycles per insn (73.90%) + 0.363198062 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.904674e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.952335e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.952335e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.470523 sec -INFO: No Floating Point Exceptions have been reported - 1,148,816,748 cycles # 2.401 GHz - 2,115,600,264 instructions # 1.84 insn per cycle - 0.478989217 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 0.14771957969060168 + File "", line 1 + me1=; me2=0.14771957969060168; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 45ada3a90e..a33a4d4a04 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:35 -DATE: 2024-09-15_12:18:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.081665e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.353918e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.800720e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.522074 sec -INFO: No Floating Point Exceptions have been reported - 2,188,417,927 cycles # 2.886 GHz - 3,109,980,535 instructions # 1.42 insn per cycle - 0.814803667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 766,231,314 cycles:u # 2.212 GHz (74.63%) + 2,355,246 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.38%) + 6,156,803 stalled-cycles-backend:u # 0.80% backend cycles idle (75.25%) + 1,368,516,818 instructions:u # 1.79 insn per cycle + # 0.00 stalled cycles per insn (75.59%) + 0.387224131 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477196e-01 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 0.14771956187351573 -Relative difference = 2.5810037581511336e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.910495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.017920e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.017920e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.334640 sec -INFO: No Floating Point Exceptions have been reported - 3,942,717,867 cycles # 2.929 GHz - 9,888,397,619 instructions # 2.51 insn per cycle - 1.346816311 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956645541506 -Relative difference = 2.270828308707201e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.550381e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.026476e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026476e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.835395 sec -INFO: No Floating Point Exceptions have been reported - 2,474,407,927 cycles # 2.921 GHz - 6,051,781,084 instructions # 2.45 insn per cycle - 0.847852996 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1410) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956645541506 -Relative difference = 2.270828308707201e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.251869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.352067e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352067e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.625126 sec -INFO: No Floating Point Exceptions have been reported - 1,795,351,792 cycles # 2.819 GHz - 3,389,782,871 instructions # 1.89 insn per cycle - 0.637929251 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.324637e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.489814e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489814e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.609151 sec -INFO: No Floating Point Exceptions have been reported - 1,759,711,411 cycles # 2.834 GHz - 3,345,109,138 instructions # 1.90 insn per cycle - 0.621588850 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.155541e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.119089e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.119089e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.645680 sec -INFO: No Floating Point Exceptions have been reported - 1,520,276,942 cycles # 2.311 GHz - 2,512,095,426 instructions # 1.65 insn per cycle - 0.658351218 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=0.14771956187351573; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 635fef145f..534aea9afc 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-17_09:25:38 -DATE: 2024-09-15_12:18:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.163952e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.460728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.975996e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.521235 sec -INFO: No Floating Point Exceptions have been reported - 2,178,235,183 cycles # 2.877 GHz - 3,088,126,574 instructions # 1.42 insn per cycle - 0.814490194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 791,762,847 cycles:u # 2.283 GHz (74.66%) + 2,396,414 stalled-cycles-frontend:u # 0.30% frontend cycles idle (76.03%) + 5,863,482 stalled-cycles-backend:u # 0.74% backend cycles idle (75.26%) + 1,295,975,849 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (74.34%) + 0.383224261 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.477196e-01 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 0.14771956187351573 -Relative difference = 2.5810037581511336e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.905010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.017534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.017534e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.334358 sec -INFO: No Floating Point Exceptions have been reported - 3,930,631,045 cycles # 2.921 GHz - 9,778,615,750 instructions # 2.49 insn per cycle - 1.346795690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956645541506 -Relative difference = 2.270828308707201e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.520527e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978156e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978156e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.848228 sec -INFO: No Floating Point Exceptions have been reported - 2,460,773,168 cycles # 2.862 GHz - 5,993,984,003 instructions # 2.44 insn per cycle - 0.860657174 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1368) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956645541506 -Relative difference = 2.270828308707201e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.233001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325225e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.325225e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.627322 sec -INFO: No Floating Point Exceptions have been reported - 1,810,072,132 cycles # 2.832 GHz - 3,352,499,816 instructions # 1.85 insn per cycle - 0.639697989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.331445e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.508166e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508166e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.606762 sec -INFO: No Floating Point Exceptions have been reported - 1,747,202,335 cycles # 2.825 GHz - 3,316,993,487 instructions # 1.90 insn per cycle - 0.619170203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.144157e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.097547e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.097547e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.647875 sec -INFO: No Floating Point Exceptions have been reported - 1,527,422,709 cycles # 2.315 GHz - 2,496,191,682 instructions # 1.63 insn per cycle - 0.660479795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1054) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=0.14771956187351573; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 0f0996a4b7..67f6c64343 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:10 -DATE: 2024-09-15_12:15:09 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.705596e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.093007e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.806904e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.547454 sec -INFO: No Floating Point Exceptions have been reported - 2,244,643,125 cycles # 2.856 GHz - 3,194,753,552 instructions # 1.42 insn per cycle - 0.844381889 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.817695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865026e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865026e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.917395 sec -INFO: No Floating Point Exceptions have been reported - 17,420,747,172 cycles # 2.939 GHz - 46,039,408,535 instructions # 2.64 insn per cycle - 5.929443281 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194407 -Relative difference = 6.616637439061751e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.177458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.337417e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.337417e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.449474 sec -INFO: No Floating Point Exceptions have been reported - 10,167,811,545 cycles # 2.940 GHz - 27,922,488,818 instructions # 2.75 insn per cycle - 3.461267593 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2533) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194411 -Relative difference = 6.616637417031725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.001375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.394642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.394642e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.246102 sec -INFO: No Floating Point Exceptions have been reported - 6,225,214,133 cycles # 2.758 GHz - 12,703,481,596 instructions # 2.04 insn per cycle - 2.257992148 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.452222e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.920256e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.920256e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.070361 sec -INFO: No Floating Point Exceptions have been reported - 5,740,692,800 cycles # 2.758 GHz - 12,120,362,498 instructions # 2.11 insn per cycle - 2.082196362 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2363) (512y: 144) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception + 908,994,128 cycles:u # 0.614 GHz (75.10%) + 2,398,681 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.76%) + 6,897,989 stalled-cycles-backend:u # 0.76% backend cycles idle (74.79%) + 1,416,173,298 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (74.78%) + 1.536092456 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.496236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.681187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681187e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.148263 sec -INFO: No Floating Point Exceptions have been reported - 5,893,231,770 cycles # 1.865 GHz - 8,460,083,225 instructions # 1.44 insn per cycle - 3.160116132 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0158358666195553 + File "", line 1 + me1=; me2=2.0158358666195553; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index b863aa4b8d..5d3fad6a9a 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:13 -DATE: 2024-09-15_12:15:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.254248e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.331434e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002046e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.532593 sec -INFO: No Floating Point Exceptions have been reported - 2,206,521,572 cycles # 2.874 GHz - 3,181,038,873 instructions # 1.44 insn per cycle - 0.824867346 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.847951e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.897550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.897550e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.824198 sec -INFO: No Floating Point Exceptions have been reported - 17,074,611,956 cycles # 2.927 GHz - 45,037,522,622 instructions # 2.64 insn per cycle - 5.835488505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194411 -Relative difference = 6.616637417031725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.339846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.517622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.517622e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.286951 sec -INFO: No Floating Point Exceptions have been reported - 9,688,702,526 cycles # 2.938 GHz - 26,805,473,197 instructions # 2.77 insn per cycle - 3.298888236 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194411 -Relative difference = 6.616637417031725e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.556976e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882070e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882070e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.448521 sec -INFO: No Floating Point Exceptions have been reported - 6,771,268,311 cycles # 2.753 GHz - 14,227,806,494 instructions # 2.10 insn per cycle - 2.460277833 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2704) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.776530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.130549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.130549e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.341767 sec -INFO: No Floating Point Exceptions have been reported - 6,488,711,878 cycles # 2.758 GHz - 13,822,301,429 instructions # 2.13 insn per cycle - 2.353629315 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2355) (512y: 297) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception + 848,357,308 cycles:u # 2.281 GHz (74.92%) + 2,367,674 stalled-cycles-frontend:u # 0.28% frontend cycles idle (76.36%) + 5,381,570 stalled-cycles-backend:u # 0.63% backend cycles idle (74.92%) + 1,420,128,933 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (74.06%) + 0.407523134 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.365014e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.535721e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.535721e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.263382 sec -INFO: No Floating Point Exceptions have been reported - 6,085,804,948 cycles # 1.859 GHz - 10,219,161,569 instructions # 1.68 insn per cycle - 3.275179492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0158358666195553 + File "", line 1 + me1=; me2=2.0158358666195553; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index cf83c07d47..77bc8d4d9f 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:16 -DATE: 2024-09-15_12:16:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.223710e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.732879e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860967e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.489964 sec -INFO: No Floating Point Exceptions have been reported - 2,041,886,975 cycles # 2.853 GHz - 2,932,689,889 instructions # 1.44 insn per cycle - 0.773926194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.929290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.984521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.984521e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.538462 sec -INFO: No Floating Point Exceptions have been reported - 16,282,391,613 cycles # 2.936 GHz - 45,369,954,990 instructions # 2.79 insn per cycle - 5.546087919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.517678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.857555e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.857555e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.425578 sec -INFO: No Floating Point Exceptions have been reported - 7,146,342,805 cycles # 2.938 GHz - 17,820,817,556 instructions # 2.49 insn per cycle - 2.433499088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.300139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.447005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.447005e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.366410 sec -INFO: No Floating Point Exceptions have been reported - 3,812,530,133 cycles # 2.776 GHz - 8,314,531,864 instructions # 2.18 insn per cycle - 1.374237525 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3369) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.645853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.897799e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.897799e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.316285 sec -INFO: No Floating Point Exceptions have been reported - 3,675,425,998 cycles # 2.778 GHz - 7,974,219,247 instructions # 2.17 insn per cycle - 1.323972787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3213) (512y: 20) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception + 777,343,540 cycles:u # 2.344 GHz (76.08%) + 2,487,941 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.30%) + 6,500,963 stalled-cycles-backend:u # 0.84% backend cycles idle (73.71%) + 1,352,373,320 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (74.36%) + 0.368096392 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.513578e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.178216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.178216e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.714470 sec -INFO: No Floating Point Exceptions have been reported - 3,315,579,741 cycles # 1.925 GHz - 6,150,343,295 instructions # 1.85 insn per cycle - 1.722857238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2256) (512y: 24) (512z: 2156) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0158466693246737 + File "", line 1 + me1=; me2=2.0158466693246737; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 60f2dad34a..269efb388b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,198 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:18 -DATE: 2024-09-15_12:16:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.969923e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.737348e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.863422e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.488943 sec -INFO: No Floating Point Exceptions have been reported - 2,058,928,554 cycles # 2.875 GHz - 2,909,617,560 instructions # 1.41 insn per cycle - 0.773317754 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.962212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019252e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019252e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.446348 sec -INFO: No Floating Point Exceptions have been reported - 16,014,544,982 cycles # 2.937 GHz - 44,474,347,041 instructions # 2.78 insn per cycle - 5.454124254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.286669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.759924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.759924e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.086643 sec -INFO: No Floating Point Exceptions have been reported - 6,135,728,749 cycles # 2.931 GHz - 17,120,648,230 instructions # 2.79 insn per cycle - 2.094524948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.052770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.643067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.643067e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.835441 sec -INFO: No Floating Point Exceptions have been reported - 5,101,873,696 cycles # 2.769 GHz - 10,273,156,361 instructions # 2.01 insn per cycle - 1.843297684 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.133897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.741009e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.741009e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.812216 sec -INFO: No Floating Point Exceptions have been reported - 5,041,846,676 cycles # 2.771 GHz - 10,042,915,318 instructions # 1.99 insn per cycle - 1.820042823 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception + 778,744,521 cycles:u # 2.361 GHz (76.78%) + 2,450,015 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.23%) + 6,812,142 stalled-cycles-backend:u # 0.87% backend cycles idle (74.23%) + 1,389,413,627 instructions:u # 1.78 insn per cycle + # 0.00 stalled cycles per insn (74.40%) + 0.368235043 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,44 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.642094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.969408e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.969408e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 2.363033 sec -INFO: No Floating Point Exceptions have been reported - 4,430,997,247 cycles # 1.870 GHz - 8,493,309,798 instructions # 1.92 insn per cycle - 2.370917653 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = +Avg ME (F77/GPU) = 2.0158466693246737 + File "", line 1 + me1=; me2=2.0158466693246737; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 62fab95ac2..1ea27bcdfc 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:20 -DATE: 2024-09-15_12:16:43 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.370448e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.371457e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004604e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.536026 sec -INFO: No Floating Point Exceptions have been reported - 2,214,409,122 cycles # 2.867 GHz - 3,163,292,335 instructions # 1.43 insn per cycle - 0.830149622 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception + 896,338,448 cycles:u # 2.396 GHz (75.28%) + 2,311,511 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.63%) + 7,798,060 stalled-cycles-backend:u # 0.87% backend cycles idle (75.19%) + 1,412,298,042 instructions:u # 1.58 insn per cycle + # 0.01 stalled cycles per insn (76.03%) + 0.410961939 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015836e+00 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 2.0158358639104246 -Relative difference = 6.751024171044779e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.787519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.833765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.833765e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 6.014473 sec -INFO: No Floating Point Exceptions have been reported - 17,675,190,561 cycles # 2.934 GHz - 46,198,484,525 instructions # 2.61 insn per cycle - 6.025789457 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359218686011 -Relative difference = 3.8758807327712803e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.209372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.373008e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.373008e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.414727 sec -INFO: No Floating Point Exceptions have been reported - 10,062,586,014 cycles # 2.937 GHz - 27,715,049,037 instructions # 2.75 insn per cycle - 3.427097999 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359218686011 -Relative difference = 3.8758807327712803e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.030622e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.429639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.429639e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.231035 sec -INFO: No Floating Point Exceptions have been reported - 6,157,448,048 cycles # 2.747 GHz - 12,606,647,104 instructions # 2.05 insn per cycle - 2.242669652 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2777) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.510413e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.987462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.987462e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.049793 sec -INFO: No Floating Point Exceptions have been reported - 5,651,790,254 cycles # 2.742 GHz - 12,043,922,780 instructions # 2.13 insn per cycle - 2.061986198 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2522) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.559881e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.752268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.752268e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.093339 sec -INFO: No Floating Point Exceptions have been reported - 5,777,925,002 cycles # 1.861 GHz - 8,230,989,757 instructions # 1.42 insn per cycle - 3.105063126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1866) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=2.0158358639104246; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index bde416a886..e441da7fac 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,68 +1,41 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-17_09:25:23 -DATE: 2024-09-15_12:17:08 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.300920e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.324653e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.961570e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.533607 sec -INFO: No Floating Point Exceptions have been reported - 2,198,649,887 cycles # 2.849 GHz - 3,081,934,025 instructions # 1.40 insn per cycle - 0.827713906 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception + 884,580,865 cycles:u # 2.372 GHz (73.62%) + 2,327,348 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.56%) + 5,617,735 stalled-cycles-backend:u # 0.64% backend cycles idle (75.26%) + 1,436,251,699 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (78.09%) + 0.411579403 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,174 +43,11 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015836e+00 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = Avg ME (F77/GPU) = 2.0158358639104246 -Relative difference = 6.751024171044779e-08 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.850472e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.899391e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.899391e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.814253 sec -INFO: No Floating Point Exceptions have been reported - 17,118,477,102 cycles # 2.939 GHz - 45,207,445,046 instructions # 2.64 insn per cycle - 5.826249043 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359218686011 -Relative difference = 3.8758807327712803e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.320488e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.495447e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.305181 sec -INFO: No Floating Point Exceptions have been reported - 9,752,287,704 cycles # 2.941 GHz - 26,369,462,343 instructions # 2.70 insn per cycle - 3.316567159 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359218686011 -Relative difference = 3.8758807327712803e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.466984e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.783441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.783441e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.496548 sec -INFO: No Floating Point Exceptions have been reported - 6,902,736,140 cycles # 2.753 GHz - 14,146,955,352 instructions # 2.05 insn per cycle - 2.508688639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.747061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.095585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.095585e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.355880 sec -INFO: No Floating Point Exceptions have been reported - 6,536,932,805 cycles # 2.762 GHz - 13,633,905,312 instructions # 2.09 insn per cycle - 2.367915662 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.589352e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785622e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.069165 sec -INFO: No Floating Point Exceptions have been reported - 5,741,871,289 cycles # 1.864 GHz - 9,325,593,834 instructions # 1.62 insn per cycle - 3.081760977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED + File "", line 1 + me1=; me2=2.0158358639104246; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) + ^ +SyntaxError: invalid syntax From abb441e9ef0e14cd87f60844456875a808ffc893 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 09:32:59 +0300 Subject: [PATCH 40/76] [amd] revert 96 tput logs on LUMI Revert "[amd] rerun 96 tput tests on LUMI - many issues at build time and at runtime" This reverts commit d12d08a6f385b7af47ad708eb29bdb6c9162d9a5. --- .../log_eemumu_mad_d_inl0_hrd0.txt | 248 +++++++++++-- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 265 ++++++++++++-- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 248 +++++++++++-- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 251 ++++++++++++-- .../log_eemumu_mad_d_inl0_hrd1.txt | 248 +++++++++++-- .../log_eemumu_mad_d_inl1_hrd0.txt | 248 +++++++++++-- .../log_eemumu_mad_d_inl1_hrd1.txt | 248 +++++++++++-- .../log_eemumu_mad_f_inl0_hrd0.txt | 248 +++++++++++-- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 265 ++++++++++++-- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 248 +++++++++++-- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 251 ++++++++++++-- .../log_eemumu_mad_f_inl0_hrd1.txt | 248 +++++++++++-- .../log_eemumu_mad_f_inl1_hrd0.txt | 248 +++++++++++-- .../log_eemumu_mad_f_inl1_hrd1.txt | 248 +++++++++++-- .../log_eemumu_mad_m_inl0_hrd0.txt | 248 +++++++++++-- .../log_eemumu_mad_m_inl0_hrd1.txt | 248 +++++++++++-- .../log_ggtt_mad_d_inl0_hrd0.txt | 248 +++++++++++-- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 265 ++++++++++++-- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 248 +++++++++++-- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 251 ++++++++++++-- .../log_ggtt_mad_d_inl0_hrd1.txt | 248 +++++++++++-- .../log_ggtt_mad_d_inl1_hrd0.txt | 248 +++++++++++-- .../log_ggtt_mad_d_inl1_hrd1.txt | 248 +++++++++++-- .../log_ggtt_mad_f_inl0_hrd0.txt | 248 +++++++++++-- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 265 ++++++++++++-- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 248 +++++++++++-- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 251 ++++++++++++-- .../log_ggtt_mad_f_inl0_hrd1.txt | 248 +++++++++++-- .../log_ggtt_mad_f_inl1_hrd0.txt | 248 +++++++++++-- .../log_ggtt_mad_f_inl1_hrd1.txt | 248 +++++++++++-- .../log_ggtt_mad_m_inl0_hrd0.txt | 248 +++++++++++-- .../log_ggtt_mad_m_inl0_hrd1.txt | 248 +++++++++++-- .../log_ggttg_mad_d_inl0_hrd0.txt | 269 ++++++++++++-- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 289 +++++++++++++--- .../log_ggttg_mad_d_inl0_hrd1.txt | 269 ++++++++++++-- .../log_ggttg_mad_f_inl0_hrd0.txt | 269 ++++++++++++-- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 289 +++++++++++++--- .../log_ggttg_mad_f_inl0_hrd1.txt | 269 ++++++++++++-- .../log_ggttg_mad_m_inl0_hrd0.txt | 269 ++++++++++++-- .../log_ggttg_mad_m_inl0_hrd1.txt | 269 ++++++++++++-- .../log_ggttgg_mad_d_inl0_hrd0.txt | 269 ++++++++++++-- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 289 +++++++++++++--- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++++++++++++-- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 274 ++++++++++++--- .../log_ggttgg_mad_d_inl0_hrd1.txt | 269 ++++++++++++-- .../log_ggttgg_mad_d_inl1_hrd0.txt | 269 ++++++++++++-- .../log_ggttgg_mad_d_inl1_hrd1.txt | 269 ++++++++++++-- .../log_ggttgg_mad_f_inl0_hrd0.txt | 269 ++++++++++++-- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 289 +++++++++++++--- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 269 ++++++++++++-- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 274 ++++++++++++--- .../log_ggttgg_mad_f_inl0_hrd1.txt | 269 ++++++++++++-- .../log_ggttgg_mad_f_inl1_hrd0.txt | 269 ++++++++++++-- .../log_ggttgg_mad_f_inl1_hrd1.txt | 269 ++++++++++++-- .../log_ggttgg_mad_m_inl0_hrd0.txt | 267 ++++++++++++-- .../log_ggttgg_mad_m_inl0_hrd1.txt | 267 ++++++++++++-- .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 +++++++++---- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 ++++++++++---- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 +++++++++---- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++++++++---- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 ++++++++++----- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++++++++---- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 +++++++++---- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 +++++++++---- .../log_gqttq_mad_d_inl0_hrd0.txt | 293 ++++++++++------ .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 313 +++++++++++------ .../log_gqttq_mad_d_inl0_hrd1.txt | 293 ++++++++++------ .../log_gqttq_mad_f_inl0_hrd0.txt | 307 +++++++++------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 327 +++++++++++------- .../log_gqttq_mad_f_inl0_hrd1.txt | 307 +++++++++------- .../log_gqttq_mad_m_inl0_hrd0.txt | 293 ++++++++++------ .../log_gqttq_mad_m_inl0_hrd1.txt | 293 ++++++++++------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 248 +++++++++++-- .../log_heftggbb_mad_d_inl0_hrd1.txt | 248 +++++++++++-- .../log_heftggbb_mad_f_inl0_hrd0.txt | 254 ++++++++++++-- .../log_heftggbb_mad_f_inl0_hrd1.txt | 258 ++++++++++++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 256 ++++++++++++-- .../log_heftggbb_mad_m_inl0_hrd1.txt | 256 ++++++++++++-- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 269 ++++++++++++-- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 269 ++++++++++++-- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 269 ++++++++++++-- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 269 ++++++++++++-- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 269 ++++++++++++-- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 269 ++++++++++++-- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 246 +++++++++++-- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 246 +++++++++++-- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 248 +++++++++++-- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 248 +++++++++++-- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 246 +++++++++++-- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 246 +++++++++++-- .../log_susyggtt_mad_d_inl0_hrd0.txt | 248 +++++++++++-- .../log_susyggtt_mad_d_inl0_hrd1.txt | 248 +++++++++++-- .../log_susyggtt_mad_f_inl0_hrd0.txt | 248 +++++++++++-- .../log_susyggtt_mad_f_inl0_hrd1.txt | 248 +++++++++++-- .../log_susyggtt_mad_m_inl0_hrd0.txt | 246 +++++++++++-- .../log_susyggtt_mad_m_inl0_hrd1.txt | 246 +++++++++++-- 96 files changed, 21020 insertions(+), 4040 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index e7b206ce0f..1292ed24b8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:02:59 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:08:03 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.330379e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.527996e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.788543e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.812638 sec +INFO: No Floating Point Exceptions have been reported + 2,711,766,628 cycles # 2.867 GHz + 4,239,903,132 instructions # 1.56 insn per cycle + 1.138564764 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.032481e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.205909e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.205909e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.541164 sec +INFO: No Floating Point Exceptions have been reported + 19,214,248,144 cycles # 2.933 GHz + 46,179,436,349 instructions # 2.40 insn per cycle + 6.552095575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.566602e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.052859e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052859e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.470271 sec +INFO: No Floating Point Exceptions have been reported + 13,145,357,361 cycles # 2.934 GHz + 31,720,883,797 instructions # 2.41 insn per cycle + 4.481479023 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.961947e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.743984e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.743984e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.647893 sec +INFO: No Floating Point Exceptions have been reported + 10,212,054,728 cycles # 2.792 GHz + 19,686,910,587 instructions # 1.93 insn per cycle + 3.658422867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 838,411,756 cycles:u # 0.390 GHz (74.20%) - 2,596,463 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.07%) - 6,168,888 stalled-cycles-backend:u # 0.74% backend cycles idle (75.24%) - 1,307,882,314 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 2.974088628 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.012892e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.837375e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.837375e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.568887 sec +INFO: No Floating Point Exceptions have been reported + 10,042,390,879 cycles # 2.806 GHz + 19,342,891,969 instructions # 1.93 insn per cycle + 3.579550757 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165208E-002 - File "", line 1 - me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.687611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.233598e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.233598e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.179012 sec +INFO: No Floating Point Exceptions have been reported + 8,766,087,418 cycles # 2.093 GHz + 15,826,503,490 instructions # 1.81 insn per cycle + 4.190350116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 7e766b1c09..656f6e2f98 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,45 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:16:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:45:05 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 2,173,251,407 cycles:u # 2.752 GHz (74.86%) - 11,405,504 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.61%) - 550,647,183 stalled-cycles-backend:u # 25.34% backend cycles idle (75.60%) - 2,544,263,226 instructions:u # 1.17 insn per cycle - # 0.22 stalled cycles per insn (74.64%) - 0.825203734 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.206256e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682542e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.682542e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.457281 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,617,652,543 cycles # 2.830 GHz + 12,995,599,451 instructions # 1.71 insn per cycle + 2.778749807 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -47,11 +79,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165208E-002 - File "", line 1 - me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 9.578808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.114252e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114252e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 7.297311 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 20,775,260,342 cycles # 2.853 GHz + 46,581,102,942 instructions # 2.24 insn per cycle + 7.320240357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.428539e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.841895e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.841895e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 5.145544 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 14,656,531,148 cycles # 2.850 GHz + 32,719,868,481 instructions # 2.23 insn per cycle + 5.168221991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.782197e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.265326 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,667,988,214 cycles # 2.728 GHz + 21,208,810,330 instructions # 1.82 insn per cycle + 4.287433901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.824257e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.501728e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.501728e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.177936 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,439,960,592 cycles # 2.728 GHz + 20,869,154,642 instructions # 1.82 insn per cycle + 4.198165961 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.555883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.022583e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.022583e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.778023 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,270,194,102 cycles # 2.143 GHz + 17,125,695,085 instructions # 1.67 insn per cycle + 4.797944534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 09f746db11..c883b5b3b2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:19:34 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:57:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.026987e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683583e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.857936e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 1.462390 sec +INFO: No Floating Point Exceptions have been reported + 4,897,119,539 cycles # 2.892 GHz + 7,502,819,293 instructions # 1.53 insn per cycle + 1.751403177 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.029304e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.201451e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.201451e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 7.039422 sec +INFO: No Floating Point Exceptions have been reported + 20,621,489,655 cycles # 2.926 GHz + 46,653,049,885 instructions # 2.26 insn per cycle + 7.049549267 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.563131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.046448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.046448e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.950021 sec +INFO: No Floating Point Exceptions have been reported + 14,503,935,115 cycles # 2.925 GHz + 32,091,166,775 instructions # 2.21 insn per cycle + 4.960008276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.961733e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.745410e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745410e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.125348 sec +INFO: No Floating Point Exceptions have been reported + 11,625,768,108 cycles # 2.812 GHz + 19,969,403,537 instructions # 1.72 insn per cycle + 4.135325887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,946,666,904 cycles:u # 2.786 GHz (74.38%) - 6,463,698 stalled-cycles-frontend:u # 0.33% frontend cycles idle (75.50%) - 541,991,512 stalled-cycles-backend:u # 27.84% backend cycles idle (76.24%) - 2,053,991,122 instructions:u # 1.06 insn per cycle - # 0.26 stalled cycles per insn (75.26%) - 0.732014876 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.013648e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.832922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.832922e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.042328 sec +INFO: No Floating Point Exceptions have been reported + 11,410,026,502 cycles # 2.817 GHz + 19,423,165,232 instructions # 1.70 insn per cycle + 4.052144921 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165208E-002 - File "", line 1 - me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.712486e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.267405e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.267405e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.597004 sec +INFO: No Floating Point Exceptions have been reported + 10,166,885,638 cycles # 2.209 GHz + 15,890,691,650 instructions # 1.56 insn per cycle + 4.606740948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 45bc3a09d5..1574c6c3cf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,42 +1,200 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:19:01 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:51:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.996142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.634832e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.796610e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.900321 sec +INFO: No Floating Point Exceptions have been reported + 6,197,801,426 cycles # 2.910 GHz + 11,411,789,503 instructions # 1.84 insn per cycle + 2.187318710 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.025883e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199963e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199963e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.587430 sec +INFO: No Floating Point Exceptions have been reported + 19,281,256,149 cycles # 2.928 GHz + 46,192,094,635 instructions # 2.40 insn per cycle + 6.597986195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.548679e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019540e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.521143 sec +INFO: No Floating Point Exceptions have been reported + 13,278,378,525 cycles # 2.931 GHz + 31,736,760,460 instructions # 2.39 insn per cycle + 4.531295049 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.952371e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.722642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.722642e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.672885 sec +INFO: No Floating Point Exceptions have been reported + 10,228,932,843 cycles # 2.778 GHz + 19,706,958,837 instructions # 1.93 insn per cycle + 3.682647007 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 2,109,189,693 cycles:u # 2.839 GHz (75.50%) - 11,350,976 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.26%) - 553,134,176 stalled-cycles-backend:u # 26.22% backend cycles idle (74.75%) - 2,446,396,221 instructions:u # 1.16 insn per cycle - # 0.23 stalled cycles per insn (74.58%) - 0.768256285 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.003393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.813864e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813864e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.587790 sec +INFO: No Floating Point Exceptions have been reported + 10,033,694,863 cycles # 2.790 GHz + 19,370,562,804 instructions # 1.93 insn per cycle + 3.597832664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -44,11 +202,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165208E-002 - File "", line 1 - me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.709775e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.272417e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.272417e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.131241 sec +INFO: No Floating Point Exceptions have been reported + 8,787,275,470 cycles # 2.123 GHz + 15,836,849,319 instructions # 1.80 insn per cycle + 4.141346630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index a865a53954..3b02782d45 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:03:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:08:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.564401e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.700588e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875805e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.684524 sec +INFO: No Floating Point Exceptions have been reported + 2,669,304,279 cycles # 2.860 GHz + 4,081,785,887 instructions # 1.53 insn per cycle + 0.991787338 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.030249e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204005e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.555775 sec +INFO: No Floating Point Exceptions have been reported + 19,264,579,235 cycles # 2.934 GHz + 46,142,725,089 instructions # 2.40 insn per cycle + 6.567398103 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.561230e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.041001e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041001e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.486484 sec +INFO: No Floating Point Exceptions have been reported + 13,180,735,522 cycles # 2.931 GHz + 31,698,753,932 instructions # 2.40 insn per cycle + 4.497601224 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1649) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.959717e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.742175e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742175e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.661759 sec +INFO: No Floating Point Exceptions have been reported + 10,296,251,645 cycles # 2.804 GHz + 19,686,624,933 instructions # 1.91 insn per cycle + 3.673145773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1895) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 790,069,371 cycles:u # 2.227 GHz (75.28%) - 2,433,436 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.64%) - 6,944,825 stalled-cycles-backend:u # 0.88% backend cycles idle (75.52%) - 1,353,780,506 instructions:u # 1.71 insn per cycle - # 0.01 stalled cycles per insn (75.62%) - 0.392071241 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.002735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826033e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.593177 sec +INFO: No Floating Point Exceptions have been reported + 10,082,197,083 cycles # 2.798 GHz + 19,384,360,663 instructions # 1.92 insn per cycle + 3.604587412 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165216E-002 - File "", line 1 - me1=; me2=1.2828039868165216E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.753760e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.344313e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.344313e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.042847 sec +INFO: No Floating Point Exceptions have been reported + 8,657,274,459 cycles # 2.136 GHz + 15,708,080,882 instructions # 1.81 insn per cycle + 4.054289402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 834) (512y: 156) (512z: 1237) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 7072c1ce28..9adc226af5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:11:36 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:33:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.203471e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505439e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.793875e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.692895 sec +INFO: No Floating Point Exceptions have been reported + 2,667,104,303 cycles # 2.870 GHz + 4,197,568,068 instructions # 1.57 insn per cycle + 0.991023921 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.609773e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.065122e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.065122e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.323744 sec +INFO: No Floating Point Exceptions have been reported + 12,680,899,676 cycles # 2.930 GHz + 32,573,373,461 instructions # 2.57 insn per cycle + 4.329822925 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.020941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.881765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.881765e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.528541 sec +INFO: No Floating Point Exceptions have been reported + 10,343,960,768 cycles # 2.928 GHz + 24,660,363,232 instructions # 2.38 insn per cycle + 3.534351751 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.219408e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.252697e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.252697e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.248651 sec +INFO: No Floating Point Exceptions have been reported + 9,122,079,188 cycles # 2.804 GHz + 16,949,443,243 instructions # 1.86 insn per cycle + 3.254977824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1616) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception - 804,560,077 cycles:u # 2.249 GHz (73.97%) - 2,419,360 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.78%) - 7,569,528 stalled-cycles-backend:u # 0.94% backend cycles idle (75.57%) - 1,402,555,928 instructions:u # 1.74 insn per cycle - # 0.01 stalled cycles per insn (73.86%) - 0.394680056 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.281631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372235e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372235e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.167737 sec +INFO: No Floating Point Exceptions have been reported + 8,922,630,281 cycles # 2.812 GHz + 16,368,012,425 instructions # 1.83 insn per cycle + 3.174211351 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1352) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165208E-002 - File "", line 1 - me1=; me2=1.2828039868165208E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.953386e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.685128e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.685128e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.635921 sec +INFO: No Floating Point Exceptions have been reported + 7,907,839,436 cycles # 2.172 GHz + 14,593,864,068 instructions # 1.85 insn per cycle + 3.642895717 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 955) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index b1c8c8b726..a111e191c2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:11:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:34:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.369824e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600320e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.803756e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.688866 sec +INFO: No Floating Point Exceptions have been reported + 2,687,883,365 cycles # 2.879 GHz + 4,137,672,828 instructions # 1.54 insn per cycle + 0.991982760 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.085552e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.924035e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.924035e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.430840 sec +INFO: No Floating Point Exceptions have been reported + 10,018,256,596 cycles # 2.916 GHz + 25,507,694,274 instructions # 2.55 insn per cycle + 3.436494229 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.371301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.639987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.639987e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.073954 sec +INFO: No Floating Point Exceptions have been reported + 9,025,495,783 cycles # 2.931 GHz + 21,478,170,721 instructions # 2.38 insn per cycle + 3.080490687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868164916E-002 +Relative difference = 1.0277102699700292e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.348565e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.529213e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529213e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.091062 sec +INFO: No Floating Point Exceptions have been reported + 8,721,037,733 cycles # 2.816 GHz + 15,901,191,500 instructions # 1.82 insn per cycle + 3.097416237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1489) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception - 820,698,066 cycles:u # 2.317 GHz (75.60%) - 2,218,614 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.41%) - 6,296,901 stalled-cycles-backend:u # 0.77% backend cycles idle (74.13%) - 1,383,289,627 instructions:u # 1.69 insn per cycle - # 0.00 stalled cycles per insn (74.13%) - 0.391638435 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.428348e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.696607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.696607e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.003427 sec +INFO: No Floating Point Exceptions have been reported + 8,472,649,935 cycles # 2.816 GHz + 15,622,192,614 instructions # 1.84 insn per cycle + 3.009695803 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039868165216E-002 - File "", line 1 - me1=; me2=1.2828039868165216E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.053185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.879921e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.879921e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.481285 sec +INFO: No Floating Point Exceptions have been reported + 7,632,139,448 cycles # 2.189 GHz + 14,304,829,590 instructions # 1.87 insn per cycle + 3.488200715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1032) (512y: 164) (512z: 877) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 4db4cc248b..bc5233a5ba 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:03:07 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:09:06 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.192132e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.336696e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.300693e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.585451 sec +INFO: No Floating Point Exceptions have been reported + 2,336,870,734 cycles # 2.880 GHz + 3,644,936,097 instructions # 1.56 insn per cycle + 0.870296260 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.072651e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.269381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269381e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.255061 sec +INFO: No Floating Point Exceptions have been reported + 18,355,246,050 cycles # 2.931 GHz + 45,043,077,667 instructions # 2.45 insn per cycle + 6.263286658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.241713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.430745e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.430745e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.200925 sec +INFO: No Floating Point Exceptions have been reported + 9,386,491,422 cycles # 2.926 GHz + 22,329,398,339 instructions # 2.38 insn per cycle + 3.208910381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.404117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697492e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697492e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.006682 sec +INFO: No Floating Point Exceptions have been reported + 8,484,958,572 cycles # 2.815 GHz + 15,797,352,563 instructions # 1.86 insn per cycle + 3.014624816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 766,476,010 cycles:u # 2.323 GHz (74.78%) - 2,430,705 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.86%) - 6,550,118 stalled-cycles-backend:u # 0.85% backend cycles idle (76.34%) - 1,341,880,376 instructions:u # 1.75 insn per cycle - # 0.00 stalled cycles per insn (75.25%) - 0.390122279 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.426806e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765840e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765840e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.982618 sec +INFO: No Floating Point Exceptions have been reported + 8,401,165,701 cycles # 2.811 GHz + 15,653,777,374 instructions # 1.86 insn per cycle + 2.990373231 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.426463e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.722731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722731e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.987827 sec +INFO: No Floating Point Exceptions have been reported + 6,753,744,387 cycles # 2.255 GHz + 12,906,211,238 instructions # 1.91 insn per cycle + 2.995926915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 18f7684bf7..95b8681521 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,45 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:17:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:45:42 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 2,121,384,504 cycles:u # 2.853 GHz (75.66%) - 19,652,587 stalled-cycles-frontend:u # 0.93% frontend cycles idle (74.90%) - 551,623,680 stalled-cycles-backend:u # 26.00% backend cycles idle (74.38%) - 2,447,211,339 instructions:u # 1.15 insn per cycle - # 0.23 stalled cycles per insn (74.70%) - 0.779212203 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.378449e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.944337 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,200,765,179 cycles # 2.831 GHz + 10,073,714,089 instructions # 1.62 insn per cycle + 2.274311561 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -47,11 +79,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.013177e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.196081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.196081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.753252 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,140,260,721 cycles # 2.851 GHz + 45,281,984,182 instructions # 2.37 insn per cycle + 6.770979415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.075255e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.101362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.101362e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.587325 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,243,661,246 cycles # 2.856 GHz + 23,736,113,257 instructions # 2.32 insn per cycle + 3.601313820 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.208611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299964e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.407535 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,325,104,909 cycles # 2.739 GHz + 16,992,883,294 instructions # 1.82 insn per cycle + 3.420829574 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.223475e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.357947e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.357947e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.391442 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,279,675,392 cycles # 2.737 GHz + 16,862,711,706 instructions # 1.82 insn per cycle + 3.405369019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.268012e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359028e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359028e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.328677 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,543,875,114 cycles # 2.260 GHz + 14,180,005,728 instructions # 1.88 insn per cycle + 3.340981281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index e386e5b810..15fa7d3112 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:19:37 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:58:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.336762e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.510278e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.479828e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.311031 sec +INFO: No Floating Point Exceptions have been reported + 4,430,467,531 cycles # 2.889 GHz + 6,960,795,222 instructions # 1.57 insn per cycle + 1.590735457 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.075223e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270769e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 6.687798 sec +INFO: No Floating Point Exceptions have been reported + 19,629,378,634 cycles # 2.933 GHz + 45,588,143,016 instructions # 2.32 insn per cycle + 6.694819566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.248280e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.433718e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.433718e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 3.634348 sec +INFO: No Floating Point Exceptions have been reported + 10,674,006,930 cycles # 2.932 GHz + 22,771,305,471 instructions # 2.13 insn per cycle + 3.641620548 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.377112e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.480978 sec +INFO: No Floating Point Exceptions have been reported + 9,770,888,729 cycles # 2.802 GHz + 16,055,948,307 instructions # 1.64 insn per cycle + 3.487814115 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,910,318,117 cycles:u # 2.899 GHz (75.03%) - 14,858,846 stalled-cycles-frontend:u # 0.78% frontend cycles idle (76.42%) - 549,077,433 stalled-cycles-backend:u # 28.74% backend cycles idle (75.40%) - 2,015,487,344 instructions:u # 1.06 insn per cycle - # 0.27 stalled cycles per insn (75.11%) - 0.688448405 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.421430e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.770475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.770475e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.434581 sec +INFO: No Floating Point Exceptions have been reported + 9,740,474,003 cycles # 2.831 GHz + 15,722,386,015 instructions # 1.61 insn per cycle + 3.441655213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.441505e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.731463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.731463e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.419856 sec +INFO: No Floating Point Exceptions have been reported + 8,031,724,309 cycles # 2.344 GHz + 12,960,768,751 instructions # 1.61 insn per cycle + 3.427508239 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 116cdbf3e7..6589d6b6fa 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,42 +1,200 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:19:03 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:52:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.003631e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.641927e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.671119e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.488823 sec +INFO: No Floating Point Exceptions have been reported + 4,967,613,517 cycles # 2.899 GHz + 9,171,831,308 instructions # 1.85 insn per cycle + 1.769936667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.073262e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267974e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.250612 sec +INFO: No Floating Point Exceptions have been reported + 18,322,110,309 cycles # 2.929 GHz + 45,051,388,062 instructions # 2.46 insn per cycle + 6.257775360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.247339e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.424409e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424409e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.190677 sec +INFO: No Floating Point Exceptions have been reported + 9,359,387,266 cycles # 2.928 GHz + 22,331,498,291 instructions # 2.39 insn per cycle + 3.197654484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.363785e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.679448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679448e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.047896 sec +INFO: No Floating Point Exceptions have been reported + 8,583,132,130 cycles # 2.811 GHz + 15,806,350,534 instructions # 1.84 insn per cycle + 3.054826008 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 2,081,336,434 cycles:u # 2.902 GHz (75.47%) - 20,574,722 stalled-cycles-frontend:u # 0.99% frontend cycles idle (75.72%) - 556,746,417 stalled-cycles-backend:u # 26.75% backend cycles idle (75.25%) - 2,423,831,594 instructions:u # 1.16 insn per cycle - # 0.23 stalled cycles per insn (74.31%) - 0.741698760 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.430638e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.771124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.771124e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.974287 sec +INFO: No Floating Point Exceptions have been reported + 8,401,059,167 cycles # 2.818 GHz + 15,651,581,046 instructions # 1.86 insn per cycle + 2.981875735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -44,11 +202,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.438697e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.736996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736996e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.973534 sec +INFO: No Floating Point Exceptions have been reported + 6,722,109,548 cycles # 2.256 GHz + 12,906,606,049 instructions # 1.92 insn per cycle + 2.981153680 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index ff44ec4aeb..bed528f6e7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:03:09 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:09:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.185244e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.645179e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.802907e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.581560 sec +INFO: No Floating Point Exceptions have been reported + 2,321,713,730 cycles # 2.869 GHz + 3,648,873,879 instructions # 1.57 insn per cycle + 0.865547071 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.067642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.262437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.262437e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.278398 sec +INFO: No Floating Point Exceptions have been reported + 18,394,901,899 cycles # 2.927 GHz + 45,013,341,285 instructions # 2.45 insn per cycle + 6.286516700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.249815e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.432877e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432877e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.188261 sec +INFO: No Floating Point Exceptions have been reported + 9,382,779,388 cycles # 2.937 GHz + 22,291,184,899 instructions # 2.38 insn per cycle + 3.196123670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1939) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.394804e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683014e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.683014e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.016616 sec +INFO: No Floating Point Exceptions have been reported + 8,501,260,075 cycles # 2.812 GHz + 15,791,303,131 instructions # 1.86 insn per cycle + 3.024850695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2539) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 764,123,086 cycles:u # 2.338 GHz (74.12%) - 2,314,510 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.68%) - 6,276,540 stalled-cycles-backend:u # 0.82% backend cycles idle (74.96%) - 1,379,052,675 instructions:u # 1.80 insn per cycle - # 0.00 stalled cycles per insn (75.94%) - 0.364419040 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.433401e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784502e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.973425 sec +INFO: No Floating Point Exceptions have been reported + 8,414,276,106 cycles # 2.823 GHz + 15,633,261,481 instructions # 1.86 insn per cycle + 2.981340876 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2436) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.449856e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.750896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.750896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.962493 sec +INFO: No Floating Point Exceptions have been reported + 6,702,761,235 cycles # 2.257 GHz + 12,885,740,598 instructions # 1.92 insn per cycle + 2.970728824 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1667) (512y: 18) (512z: 1428) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index f9b9273f50..711fbf3a50 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:11:41 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:34:45 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.272057e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.453757e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.411368e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.577320 sec +INFO: No Floating Point Exceptions have been reported + 2,322,367,280 cycles # 2.881 GHz + 3,616,476,077 instructions # 1.56 insn per cycle + 0.862621614 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.635657e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.129532e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129532e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.218771 sec +INFO: No Floating Point Exceptions have been reported + 12,191,913,623 cycles # 2.887 GHz + 32,293,306,178 instructions # 2.65 insn per cycle + 4.224304323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.654215e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.464911e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.464911e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.746425 sec +INFO: No Floating Point Exceptions have been reported + 8,013,864,577 cycles # 2.914 GHz + 18,725,751,725 instructions # 2.34 insn per cycle + 2.751635696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.734762e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.516819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.516819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.676523 sec +INFO: No Floating Point Exceptions have been reported + 7,476,186,846 cycles # 2.791 GHz + 14,257,923,546 instructions # 1.91 insn per cycle + 2.682062632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception - 791,028,835 cycles:u # 2.408 GHz (73.43%) - 2,343,117 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.58%) - 7,158,894 stalled-cycles-backend:u # 0.91% backend cycles idle (75.68%) - 1,370,401,779 instructions:u # 1.73 insn per cycle - # 0.01 stalled cycles per insn (75.79%) - 0.365233728 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.834242e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.778618e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778618e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.592675 sec +INFO: No Floating Point Exceptions have been reported + 7,344,696,907 cycles # 2.828 GHz + 13,952,931,831 instructions # 1.90 insn per cycle + 2.598198803 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.491060e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.875896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.875896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.903149 sec +INFO: No Floating Point Exceptions have been reported + 6,571,286,194 cycles # 2.260 GHz + 13,433,545,963 instructions # 2.04 insn per cycle + 2.908820313 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 1) (512z: 1199) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 5031f9b51b..6fc527ffa1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:11:43 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:35:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.289380e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.618768e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.817817e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.580301 sec +INFO: No Floating Point Exceptions have been reported + 2,323,629,755 cycles # 2.848 GHz + 3,593,641,981 instructions # 1.55 insn per cycle + 0.873274895 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282802e-02 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.209448e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.220446e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.220446e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.220192 sec +INFO: No Floating Point Exceptions have been reported + 9,366,774,041 cycles # 2.905 GHz + 25,702,432,609 instructions # 2.74 insn per cycle + 3.225730639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.014104e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.557363e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.557363e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.462960 sec +INFO: No Floating Point Exceptions have been reported + 7,216,847,131 cycles # 2.925 GHz + 16,891,846,951 instructions # 2.34 insn per cycle + 2.468502980 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.924187e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.020326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.020326e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.522608 sec +INFO: No Floating Point Exceptions have been reported + 7,150,122,380 cycles # 2.829 GHz + 13,633,449,373 instructions # 1.91 insn per cycle + 2.528205937 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception - 771,335,849 cycles:u # 2.360 GHz (75.28%) - 2,446,075 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.46%) - 7,240,951 stalled-cycles-backend:u # 0.94% backend cycles idle (75.56%) - 1,402,589,103 instructions:u # 1.82 insn per cycle - # 0.01 stalled cycles per insn (73.98%) - 0.368335826 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.976818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.175866e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.175866e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.485267 sec +INFO: No Floating Point Exceptions have been reported + 7,047,642,186 cycles # 2.830 GHz + 13,442,931,038 instructions # 1.91 insn per cycle + 2.490839699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1940) (512y: 4) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828036060454906E-002 - File "", line 1 - me1=; me2=1.2828036060454906E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.604837e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.103202e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.103202e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.790744 sec +INFO: No Floating Point Exceptions have been reported + 6,349,721,778 cycles # 2.272 GHz + 13,164,680,615 instructions # 2.07 insn per cycle + 2.796235299 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2033) (512y: 1) (512z: 1085) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index eb2412c3dc..caa67d1a4c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:03:12 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:10:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.610039e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.567106e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.762593e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.688497 sec +INFO: No Floating Point Exceptions have been reported + 2,665,329,968 cycles # 2.845 GHz + 4,055,682,627 instructions # 1.52 insn per cycle + 0.995154695 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.002881e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.168467e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.726627 sec +INFO: No Floating Point Exceptions have been reported + 19,724,365,546 cycles # 2.928 GHz + 46,388,641,620 instructions # 2.35 insn per cycle + 6.737968541 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039952548879E-002 +Relative difference = 3.6990156841838714e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.617185e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.143896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143896e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.347629 sec +INFO: No Floating Point Exceptions have been reported + 12,771,945,524 cycles # 2.931 GHz + 31,577,972,239 instructions # 2.47 insn per cycle + 4.359278192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1719) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039952548879E-002 +Relative difference = 3.6990156841838714e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.943978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.720569e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.720569e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.688531 sec +INFO: No Floating Point Exceptions have been reported + 10,322,037,008 cycles # 2.790 GHz + 19,570,801,424 instructions # 1.90 insn per cycle + 3.699996508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 801,797,045 cycles:u # 2.250 GHz (75.44%) - 2,260,290 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.18%) - 8,048,661 stalled-cycles-backend:u # 1.00% backend cycles idle (75.37%) - 1,409,315,454 instructions:u # 1.76 insn per cycle - # 0.01 stalled cycles per insn (73.94%) - 0.396528391 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.986657e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.789520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789520e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.618212 sec +INFO: No Floating Point Exceptions have been reported + 10,149,499,266 cycles # 2.797 GHz + 19,312,096,557 instructions # 1.90 insn per cycle + 3.629679706 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1785) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039901590281E-002 - File "", line 1 - me1=; me2=1.2828039901590281E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.777391e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.385382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.385382e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.992684 sec +INFO: No Floating Point Exceptions have been reported + 8,588,251,503 cycles # 2.146 GHz + 15,161,251,122 instructions # 1.77 insn per cycle + 4.003805537 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 954) (512y: 154) (512z: 1322) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 7523130c83..ce1b16067d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-17_09:03:14 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:10:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.695377e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.640031e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.828039e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.676129 sec +INFO: No Floating Point Exceptions have been reported + 2,632,225,960 cycles # 2.883 GHz + 4,132,384,248 instructions # 1.57 insn per cycle + 0.970493332 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.282804e-02 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.005676e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.176899e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.176899e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.735465 sec +INFO: No Floating Point Exceptions have been reported + 19,720,225,593 cycles # 2.924 GHz + 46,326,489,596 instructions # 2.35 insn per cycle + 6.746197968 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039952548879E-002 +Relative difference = 3.6990156841838714e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.574820e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.144081e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.144081e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.449866 sec +INFO: No Floating Point Exceptions have been reported + 13,065,779,841 cycles # 2.930 GHz + 31,555,443,434 instructions # 2.42 insn per cycle + 4.460852067 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1711) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039952548879E-002 +Relative difference = 3.6990156841838714e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.952135e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.730440e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.730440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.677079 sec +INFO: No Floating Point Exceptions have been reported + 10,320,566,663 cycles # 2.800 GHz + 19,557,785,526 instructions # 1.90 insn per cycle + 3.688245631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2026) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 800,037,746 cycles:u # 2.261 GHz (74.89%) - 2,538,107 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.80%) - 6,828,914 stalled-cycles-backend:u # 0.85% backend cycles idle (75.63%) - 1,342,253,081 instructions:u # 1.68 insn per cycle - # 0.01 stalled cycles per insn (74.78%) - 0.391601204 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.981919e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.782784e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.782784e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.626089 sec +INFO: No Floating Point Exceptions have been reported + 10,150,645,903 cycles # 2.793 GHz + 19,388,040,023 instructions # 1.91 insn per cycle + 3.637342012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1779) (512y: 189) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.2828039901590284E-002 - File "", line 1 - me1=; me2=1.2828039901590284E-002; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.806136e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.449559e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.449559e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.931921 sec +INFO: No Floating Point Exceptions have been reported + 8,442,748,276 cycles # 2.150 GHz + 15,068,523,446 instructions # 1.78 insn per cycle + 3.943167549 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 947) (512y: 156) (512z: 1306) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 11cb6d94bd..aeadfaae64 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:11:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.391981e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.330443e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949573e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.534465 sec +INFO: No Floating Point Exceptions have been reported + 2,210,485,527 cycles # 2.869 GHz + 3,136,829,588 instructions # 1.42 insn per cycle + 0.828432932 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.818281e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865529e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865529e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.911600 sec +INFO: No Floating Point Exceptions have been reported + 17,389,649,504 cycles # 2.935 GHz + 46,036,709,188 instructions # 2.65 insn per cycle + 5.925127688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.165855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325075e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.453922 sec +INFO: No Floating Point Exceptions have been reported + 10,171,046,914 cycles # 2.936 GHz + 27,937,548,503 instructions # 2.75 insn per cycle + 3.465600263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.967598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.358232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.358232e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.252729 sec +INFO: No Floating Point Exceptions have been reported + 6,219,848,194 cycles # 2.748 GHz + 12,677,070,824 instructions # 2.04 insn per cycle + 2.263945260 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 869,576,390 cycles:u # 0.583 GHz (75.13%) - 2,440,335 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.50%) - 6,206,852 stalled-cycles-backend:u # 0.71% backend cycles idle (75.25%) - 1,476,984,592 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (74.77%) - 1.545981711 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.478036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.948706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.948706e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.055945 sec +INFO: No Floating Point Exceptions have been reported + 5,693,440,562 cycles # 2.756 GHz + 12,116,317,958 instructions # 2.13 insn per cycle + 2.067013514 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.483396e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.667202e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.667202e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.151048 sec +INFO: No Floating Point Exceptions have been reported + 5,836,401,977 cycles # 1.846 GHz + 8,391,475,751 instructions # 1.44 insn per cycle + 3.162234928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 9c9e966a48..9022013b0c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,45 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:17:03 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:46:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 2,249,015,642 cycles:u # 2.777 GHz (74.62%) - 11,048,040 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.43%) - 556,416,400 stalled-cycles-backend:u # 24.74% backend cycles idle (74.13%) - 2,529,763,771 instructions:u # 1.12 insn per cycle - # 0.22 stalled cycles per insn (74.78%) - 0.846028883 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.381047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.782856e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.782856e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.840023 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,108,426,192 cycles # 2.861 GHz + 4,770,924,698 instructions # 1.53 insn per cycle + 1.146594198 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -47,11 +79,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.806159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.852377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.852377e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 6.067320 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,856,813,573 cycles # 2.936 GHz + 46,243,571,751 instructions # 2.59 insn per cycle + 6.083398130 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.134236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.289007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.289007e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.611222 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,640,342,598 cycles # 2.934 GHz + 28,274,377,614 instructions # 2.66 insn per cycle + 3.627839941 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.918119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.295386e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.295386e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.398079 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,685,318,792 cycles # 2.770 GHz + 13,122,453,026 instructions # 1.96 insn per cycle + 2.414568983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.370026e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.821205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.821205e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.218649 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,189,711,182 cycles # 2.770 GHz + 12,557,371,407 instructions # 2.03 insn per cycle + 2.235322482 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.469847e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.650575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.650575e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.290950 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,320,487,640 cycles # 1.913 GHz + 8,791,643,966 instructions # 1.39 insn per cycle + 3.307886654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index ac00831180..85f95aac4c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:19:39 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:58:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.424217e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.466051e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.011391e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 0.646400 sec +INFO: No Floating Point Exceptions have been reported + 2,528,248,856 cycles # 2.879 GHz + 3,688,196,917 instructions # 1.46 insn per cycle + 0.934986871 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.819736e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.867152e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.867152e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 5.985044 sec +INFO: No Floating Point Exceptions have been reported + 17,600,111,411 cycles # 2.936 GHz + 46,124,554,790 instructions # 2.62 insn per cycle + 5.995774241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.168097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.326675e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.326675e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.533047 sec +INFO: No Floating Point Exceptions have been reported + 10,405,596,477 cycles # 2.937 GHz + 28,016,084,485 instructions # 2.69 insn per cycle + 3.543840924 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.000915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.392912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.392912e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.319991 sec +INFO: No Floating Point Exceptions have been reported + 6,443,899,577 cycles # 2.766 GHz + 12,743,367,354 instructions # 1.98 insn per cycle + 2.330731733 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,995,561,443 cycles:u # 2.801 GHz (74.88%) - 6,510,105 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.93%) - 549,438,216 stalled-cycles-backend:u # 27.53% backend cycles idle (74.45%) - 2,170,651,447 instructions:u # 1.09 insn per cycle - # 0.25 stalled cycles per insn (75.43%) - 0.742750781 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.453355e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.917864e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.917864e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.148765 sec +INFO: No Floating Point Exceptions have been reported + 5,965,699,512 cycles # 2.764 GHz + 12,146,978,501 instructions # 2.04 insn per cycle + 2.159407986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.499391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.683844e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.683844e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.221377 sec +INFO: No Floating Point Exceptions have been reported + 6,080,803,082 cycles # 1.882 GHz + 8,423,087,351 instructions # 1.39 insn per cycle + 3.232264502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 15956dbce8..fa08fbada3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,42 +1,200 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:19:06 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:52:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.726431e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.401767e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003069e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.719407 sec +INFO: No Floating Point Exceptions have been reported + 2,718,962,196 cycles # 2.853 GHz + 4,261,744,999 instructions # 1.57 insn per cycle + 1.009279183 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.804236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.851320e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.851320e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.958188 sec +INFO: No Floating Point Exceptions have been reported + 17,386,557,790 cycles # 2.923 GHz + 46,053,036,463 instructions # 2.65 insn per cycle + 5.968882862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.150158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308857e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.477756 sec +INFO: No Floating Point Exceptions have been reported + 10,179,732,087 cycles # 2.919 GHz + 27,956,952,229 instructions # 2.75 insn per cycle + 3.488776572 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.931728e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.313640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.313640e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.272950 sec +INFO: No Floating Point Exceptions have been reported + 6,250,280,118 cycles # 2.738 GHz + 12,699,256,189 instructions # 2.03 insn per cycle + 2.283692354 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 2,195,815,390 cycles:u # 2.880 GHz (75.56%) - 10,892,913 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.02%) - 547,003,488 stalled-cycles-backend:u # 24.91% backend cycles idle (74.82%) - 2,513,157,300 instructions:u # 1.14 insn per cycle - # 0.22 stalled cycles per insn (74.55%) - 0.788757141 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.454076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.923276e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.923276e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.069149 sec +INFO: No Floating Point Exceptions have been reported + 5,725,799,884 cycles # 2.754 GHz + 12,135,179,967 instructions # 2.12 insn per cycle + 2.079783939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -44,11 +202,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.414550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.592343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592343e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.218110 sec +INFO: No Floating Point Exceptions have been reported + 5,959,079,250 cycles # 1.847 GHz + 8,422,189,176 instructions # 1.41 insn per cycle + 3.228977501 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index c0bd5870cd..328467ef63 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:11:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.368553e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.328924e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.965685e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.531897 sec +INFO: No Floating Point Exceptions have been reported + 2,204,349,816 cycles # 2.864 GHz + 3,169,634,690 instructions # 1.44 insn per cycle + 0.825649601 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.870268e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919754e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.735202 sec +INFO: No Floating Point Exceptions have been reported + 16,847,713,247 cycles # 2.933 GHz + 44,981,738,957 instructions # 2.67 insn per cycle + 5.744748484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.300705e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.473658e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473658e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.304655 sec +INFO: No Floating Point Exceptions have been reported + 9,659,083,497 cycles # 2.916 GHz + 26,749,720,361 instructions # 2.77 insn per cycle + 3.314062418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2328) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.583161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.907950e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.907950e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.415079 sec +INFO: No Floating Point Exceptions have been reported + 6,675,336,151 cycles # 2.753 GHz + 14,174,925,457 instructions # 2.12 insn per cycle + 2.425332683 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 897,157,021 cycles:u # 2.392 GHz (75.81%) - 2,363,003 stalled-cycles-frontend:u # 0.26% frontend cycles idle (76.35%) - 5,504,469 stalled-cycles-backend:u # 0.61% backend cycles idle (74.94%) - 1,378,212,077 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (74.78%) - 0.411268664 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.674600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.017717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.017717e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.378233 sec +INFO: No Floating Point Exceptions have been reported + 6,574,155,578 cycles # 2.754 GHz + 13,789,180,928 instructions # 2.10 insn per cycle + 2.388565062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.383516e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.554686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.554686e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.229855 sec +INFO: No Floating Point Exceptions have been reported + 5,994,389,719 cycles # 1.851 GHz + 10,123,629,860 instructions # 1.69 insn per cycle + 3.240029027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 42bc47a590..2da881e2b3 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:11:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:35:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.302471e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.316570e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001729e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.535501 sec +INFO: No Floating Point Exceptions have been reported + 2,215,078,191 cycles # 2.874 GHz + 3,154,679,095 instructions # 1.42 insn per cycle + 0.829146554 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.351910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.430033e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.430033e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.571525 sec +INFO: No Floating Point Exceptions have been reported + 13,035,401,690 cycles # 2.848 GHz + 34,355,905,973 instructions # 2.64 insn per cycle + 4.578322526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.974205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.110138e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.110138e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.639722 sec +INFO: No Floating Point Exceptions have been reported + 10,720,308,622 cycles # 2.941 GHz + 24,027,850,859 instructions # 2.24 insn per cycle + 3.646936507 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.622097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.949479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.949479e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.385273 sec +INFO: No Floating Point Exceptions have been reported + 6,607,425,584 cycles # 2.762 GHz + 12,368,604,074 instructions # 1.87 insn per cycle + 2.392729796 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3103) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception - 913,485,140 cycles:u # 2.425 GHz (74.81%) - 2,415,774 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) - 6,504,578 stalled-cycles-backend:u # 0.71% backend cycles idle (74.55%) - 1,437,465,825 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (72.92%) - 0.415170761 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.850394e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.302393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.302393e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.279711 sec +INFO: No Floating Point Exceptions have been reported + 6,291,615,830 cycles # 2.752 GHz + 11,595,311,145 instructions # 1.84 insn per cycle + 2.287442889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2648) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.743982e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.952525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.952525e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.921108 sec +INFO: No Floating Point Exceptions have been reported + 5,423,773,794 cycles # 1.852 GHz + 9,310,782,229 instructions # 1.72 insn per cycle + 2.929084188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 282) (512z: 1955) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 998edbd61e..86df224c90 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:11:48 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:35:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.255241e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.245059e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949681e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.533151 sec +INFO: No Floating Point Exceptions have been reported + 2,204,451,822 cycles # 2.864 GHz + 3,124,206,943 instructions # 1.42 insn per cycle + 0.826948420 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.566402e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.659710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.659710e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.199423 sec +INFO: No Floating Point Exceptions have been reported + 12,339,181,649 cycles # 2.934 GHz + 34,922,451,175 instructions # 2.83 insn per cycle + 4.206438418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515649 +Relative difference = 3.258803992249869e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.977481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.113067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.113067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.637534 sec +INFO: No Floating Point Exceptions have been reported + 10,710,920,290 cycles # 2.939 GHz + 23,032,620,692 instructions # 2.15 insn per cycle + 3.644897421 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2340) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388515654 +Relative difference = 3.2588039900609506e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.909385e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.279214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.279214e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.252873 sec +INFO: No Floating Point Exceptions have been reported + 6,212,002,997 cycles # 2.749 GHz + 11,978,645,016 instructions # 1.93 insn per cycle + 2.260347594 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2491) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception - 894,868,548 cycles:u # 2.368 GHz (73.84%) - 2,559,865 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.80%) - 6,911,754 stalled-cycles-backend:u # 0.77% backend cycles idle (73.68%) - 1,409,508,267 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (74.06%) - 0.415505974 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.039032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.423530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.423530e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.196002 sec +INFO: No Floating Point Exceptions have been reported + 6,062,556,643 cycles # 2.753 GHz + 11,146,456,018 instructions # 1.84 insn per cycle + 2.203425956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2103) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063388516817 - File "", line 1 - me1=; me2=2.0288063388516817; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.871031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.091641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.091641e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.826553 sec +INFO: No Floating Point Exceptions have been reported + 5,265,704,314 cycles # 1.859 GHz + 9,046,022,125 instructions # 1.72 insn per cycle + 2.834187629 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1571) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 23c1c53fc2..e1d11759a7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:11:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.179768e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.708203e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.827426e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.491224 sec +INFO: No Floating Point Exceptions have been reported + 2,056,714,115 cycles # 2.865 GHz + 2,916,773,309 instructions # 1.42 insn per cycle + 0.776029796 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.918838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972832e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.552302 sec +INFO: No Floating Point Exceptions have been reported + 16,247,282,670 cycles # 2.924 GHz + 45,328,928,537 instructions # 2.79 insn per cycle + 5.557963082 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.529514e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.866293e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.866293e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.402922 sec +INFO: No Floating Point Exceptions have been reported + 7,055,912,070 cycles # 2.931 GHz + 17,768,218,222 instructions # 2.52 insn per cycle + 2.408607319 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.299822e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.410195e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.410195e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.350007 sec +INFO: No Floating Point Exceptions have been reported + 3,747,874,250 cycles # 2.767 GHz + 8,260,976,747 instructions # 2.20 insn per cycle + 1.355686963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 805,132,482 cycles:u # 2.412 GHz (74.89%) - 2,287,780 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.96%) - 6,669,480 stalled-cycles-backend:u # 0.83% backend cycles idle (76.06%) - 1,358,609,559 instructions:u # 1.69 insn per cycle - # 0.00 stalled cycles per insn (76.47%) - 0.372925518 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.794382e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005480e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.278305 sec +INFO: No Floating Point Exceptions have been reported + 3,550,706,297 cycles # 2.767 GHz + 7,915,681,558 instructions # 2.23 insn per cycle + 1.284036639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.489307e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.134354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.134354e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.705925 sec +INFO: No Floating Point Exceptions have been reported + 3,272,576,419 cycles # 1.913 GHz + 6,103,138,487 instructions # 1.86 insn per cycle + 1.712010321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index bdaeefcf25..4785fec175 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,45 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:17:06 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:46:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 2,132,023,795 cycles:u # 2.859 GHz (75.45%) - 19,229,872 stalled-cycles-frontend:u # 0.90% frontend cycles idle (75.68%) - 542,899,446 stalled-cycles-backend:u # 25.46% backend cycles idle (75.08%) - 2,476,029,988 instructions:u # 1.16 insn per cycle - # 0.22 stalled cycles per insn (74.93%) - 0.780368458 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.985126e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.401873e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.401873e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.682896 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,622,267,762 cycles # 2.874 GHz + 4,057,326,622 instructions # 1.55 insn per cycle + 0.970912293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -47,11 +79,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.900194e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.953379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953379e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.681942 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 16,671,667,536 cycles # 2.929 GHz + 45,497,192,820 instructions # 2.73 insn per cycle + 5.692941265 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.487658e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829841e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.506047 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,392,710,973 cycles # 2.938 GHz + 18,162,302,699 instructions # 2.46 insn per cycle + 2.517172183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.160433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.262111e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.262111e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.454756 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,077,667,192 cycles # 2.783 GHz + 8,611,395,195 instructions # 2.11 insn per cycle + 1.466232607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.644244e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.901065e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.901065e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.382676 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,878,507,500 cycles # 2.784 GHz + 8,265,873,907 instructions # 2.13 insn per cycle + 1.393862906 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.387022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022226e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022226e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.814510 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,594,832,163 cycles # 1.970 GHz + 6,462,220,806 instructions # 1.80 insn per cycle + 1.825958297 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 6e8a5eba1a..ff1a0d1a39 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:19:42 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:58:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.402978e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.823028e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.958083e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.595970 sec +INFO: No Floating Point Exceptions have been reported + 2,351,354,548 cycles # 2.876 GHz + 3,444,506,671 instructions # 1.46 insn per cycle + 0.875001238 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.915967e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970379e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 5.649395 sec +INFO: No Floating Point Exceptions have been reported + 16,580,294,518 cycles # 2.931 GHz + 45,471,304,380 instructions # 2.74 insn per cycle + 5.657307639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.539990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.880360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.880360e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.485589 sec +INFO: No Floating Point Exceptions have been reported + 7,332,359,110 cycles # 2.941 GHz + 17,888,678,821 instructions # 2.44 insn per cycle + 2.493593708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.267444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.414148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.414148e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.445011 sec +INFO: No Floating Point Exceptions have been reported + 4,026,136,405 cycles # 2.773 GHz + 8,355,233,205 instructions # 2.08 insn per cycle + 1.452615562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,936,285,217 cycles:u # 2.897 GHz (74.84%) - 15,064,538 stalled-cycles-frontend:u # 0.78% frontend cycles idle (74.88%) - 545,299,393 stalled-cycles-backend:u # 28.16% backend cycles idle (75.04%) - 2,093,072,887 instructions:u # 1.08 insn per cycle - # 0.26 stalled cycles per insn (74.42%) - 0.700962558 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.729960e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.002290e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002290e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.378766 sec +INFO: No Floating Point Exceptions have been reported + 3,842,420,700 cycles # 2.773 GHz + 7,976,068,469 instructions # 2.08 insn per cycle + 1.386498519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.464492e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.118413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.118413e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.801449 sec +INFO: No Floating Point Exceptions have been reported + 3,544,493,993 cycles # 1.960 GHz + 6,155,712,678 instructions # 1.74 insn per cycle + 1.809068044 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index c3fba2dc3b..a7d3a3bcad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,42 +1,200 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:19:09 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:53:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.603732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.769962e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882903e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.632677 sec +INFO: No Floating Point Exceptions have been reported + 2,459,588,784 cycles # 2.873 GHz + 3,830,411,115 instructions # 1.56 insn per cycle + 0.912203024 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.920423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.975843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975843e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.564298 sec +INFO: No Floating Point Exceptions have been reported + 16,322,815,770 cycles # 2.930 GHz + 45,379,862,622 instructions # 2.78 insn per cycle + 5.571986399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.531426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.870727e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.870727e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.417527 sec +INFO: No Floating Point Exceptions have been reported + 7,119,927,533 cycles # 2.937 GHz + 17,819,194,741 instructions # 2.50 insn per cycle + 2.425172051 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.302342e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.447707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.447707e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.366211 sec +INFO: No Floating Point Exceptions have been reported + 3,809,953,710 cycles # 2.774 GHz + 8,311,255,796 instructions # 2.18 insn per cycle + 1.374030854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 2,119,458,360 cycles:u # 2.954 GHz (74.36%) - 20,115,822 stalled-cycles-frontend:u # 0.95% frontend cycles idle (75.34%) - 552,306,849 stalled-cycles-backend:u # 26.06% backend cycles idle (75.48%) - 2,469,159,789 instructions:u # 1.16 insn per cycle - # 0.22 stalled cycles per insn (75.30%) - 0.742367314 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.793036e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.010397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.010397e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.299199 sec +INFO: No Floating Point Exceptions have been reported + 3,629,527,941 cycles # 2.777 GHz + 7,964,563,950 instructions # 2.19 insn per cycle + 1.308355970 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -44,11 +202,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.490800e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.161305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.161305e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.720915 sec +INFO: No Floating Point Exceptions have been reported + 3,334,705,650 cycles # 1.929 GHz + 6,144,839,228 instructions # 1.84 insn per cycle + 1.729518385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 22666b8d44..1a7a19dcfe 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:24 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:12:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.207213e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.745203e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857795e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487469 sec +INFO: No Floating Point Exceptions have been reported + 2,053,225,612 cycles # 2.869 GHz + 2,963,841,312 instructions # 1.44 insn per cycle + 0.771871582 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.964254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021824e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021824e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.438732 sec +INFO: No Floating Point Exceptions have been reported + 16,004,474,289 cycles # 2.939 GHz + 44,480,990,455 instructions # 2.78 insn per cycle + 5.447104045 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.270854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.739248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.739248e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.093047 sec +INFO: No Floating Point Exceptions have been reported + 6,146,902,561 cycles # 2.927 GHz + 17,124,330,277 instructions # 2.79 insn per cycle + 2.101358753 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.010634e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.590643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.590643e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.845256 sec +INFO: No Floating Point Exceptions have been reported + 5,092,053,198 cycles # 2.749 GHz + 10,266,716,383 instructions # 2.02 insn per cycle + 1.853336006 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 776,657,888 cycles:u # 2.335 GHz (75.89%) - 2,351,642 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.98%) - 12,095,370 stalled-cycles-backend:u # 1.56% backend cycles idle (76.30%) - 1,353,218,013 instructions:u # 1.74 insn per cycle - # 0.01 stalled cycles per insn (73.68%) - 0.372378571 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.078233e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.679535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.679535e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.828978 sec +INFO: No Floating Point Exceptions have been reported + 5,049,904,876 cycles # 2.749 GHz + 10,046,122,437 instructions # 1.99 insn per cycle + 1.837563375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.649805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.982891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.982891e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.359308 sec +INFO: No Floating Point Exceptions have been reported + 4,441,902,341 cycles # 1.877 GHz + 8,494,262,942 instructions # 1.91 insn per cycle + 2.367884804 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 807729e577..7bcb20b104 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:11:50 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:36:18 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.281805e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.729157e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.845967e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487987 sec +INFO: No Floating Point Exceptions have been reported + 2,055,281,728 cycles # 2.872 GHz + 2,956,266,509 instructions # 1.44 insn per cycle + 0.772297546 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.496211e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.588163e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.588163e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.287732 sec +INFO: No Floating Point Exceptions have been reported + 12,584,199,997 cycles # 2.932 GHz + 34,606,962,286 instructions # 2.75 insn per cycle + 4.293417398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.317872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.783743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.783743e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.058410 sec +INFO: No Floating Point Exceptions have been reported + 6,058,288,486 cycles # 2.936 GHz + 14,847,536,122 instructions # 2.45 insn per cycle + 2.064093895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193414453417 +Relative difference = 1.6829758681196702e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.131335e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.950373e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.950373e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.556955 sec +INFO: No Floating Point Exceptions have been reported + 4,316,973,163 cycles # 2.764 GHz + 9,053,302,579 instructions # 2.10 insn per cycle + 1.562583378 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4460) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181974319741 +Relative difference = 9.731379272303266e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception - 795,837,222 cycles:u # 2.403 GHz (73.91%) - 2,265,369 stalled-cycles-frontend:u # 0.28% frontend cycles idle (73.67%) - 7,926,590 stalled-cycles-backend:u # 1.00% backend cycles idle (74.48%) - 1,417,497,207 instructions:u # 1.78 insn per cycle - # 0.01 stalled cycles per insn (74.54%) - 0.369757999 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.308917e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.178890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.178890e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.521257 sec +INFO: No Floating Point Exceptions have been reported + 4,205,210,775 cycles # 2.756 GHz + 8,662,511,141 instructions # 2.06 insn per cycle + 1.526851661 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4225) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181974319741 +Relative difference = 9.731379272303266e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.372555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.809988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.809988e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.038768 sec +INFO: No Floating Point Exceptions have been reported + 3,837,026,814 cycles # 1.878 GHz + 7,805,330,859 instructions # 2.03 insn per cycle + 2.044464874 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183246739209 +Relative difference = 1.6003107281264138e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 7866be57c1..69afb6ef9f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:11:52 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:36:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.211144e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.722563e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857000e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487617 sec +INFO: No Floating Point Exceptions have been reported + 2,060,698,152 cycles # 2.879 GHz + 2,912,783,409 instructions # 1.41 insn per cycle + 0.772686787 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.666171e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.771083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.771083e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.019199 sec +INFO: No Floating Point Exceptions have been reported + 11,827,953,010 cycles # 2.940 GHz + 35,076,444,454 instructions # 2.97 insn per cycle + 4.024833106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.415928e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.902704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.902704e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.023749 sec +INFO: No Floating Point Exceptions have been reported + 5,953,573,213 cycles # 2.935 GHz + 14,468,346,196 instructions # 2.43 insn per cycle + 2.029398775 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193583255634 +Relative difference = 1.7661780742548925e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.382613e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.271116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.271116e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.507601 sec +INFO: No Floating Point Exceptions have been reported + 4,170,609,924 cycles # 2.758 GHz + 8,881,070,721 instructions # 2.13 insn per cycle + 1.513291878 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3570) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182104704902 +Relative difference = 1.0374044905426431e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception - 783,007,068 cycles:u # 2.350 GHz (76.69%) - 2,438,873 stalled-cycles-frontend:u # 0.31% frontend cycles idle (76.02%) - 6,261,316 stalled-cycles-backend:u # 0.80% backend cycles idle (76.46%) - 1,322,368,341 instructions:u # 1.69 insn per cycle - # 0.00 stalled cycles per insn (75.14%) - 0.371799349 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.441367e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.356283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.356283e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.495607 sec +INFO: No Floating Point Exceptions have been reported + 4,129,531,699 cycles # 2.752 GHz + 8,406,651,679 instructions # 2.04 insn per cycle + 1.501280641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288173687877133 - File "", line 1 - me1=; me2=2.0288173687877133; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182104704902 +Relative difference = 1.0374044905426431e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.439343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.891063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.891063e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.014863 sec +INFO: No Floating Point Exceptions have been reported + 3,794,076,081 cycles # 1.879 GHz + 7,699,347,303 instructions # 2.03 insn per cycle + 2.020593600 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183204829693 +Relative difference = 1.5796536184903122e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index fc42016bc7..5fcfefd8b1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:27 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:12:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.360888e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.282446e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.948861e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.535947 sec +INFO: No Floating Point Exceptions have been reported + 2,206,968,400 cycles # 2.863 GHz + 3,177,366,447 instructions # 1.44 insn per cycle + 0.829346124 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.801369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.847904e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.847904e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.967015 sec +INFO: No Floating Point Exceptions have been reported + 17,524,709,788 cycles # 2.932 GHz + 46,191,860,900 instructions # 2.64 insn per cycle + 5.978935443 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063903750300 +Relative difference = 3.0048445715164216e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.140467e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.299172e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299172e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.487825 sec +INFO: No Floating Point Exceptions have been reported + 10,261,674,067 cycles # 2.934 GHz + 27,722,537,189 instructions # 2.70 insn per cycle + 3.498978005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063903750300 +Relative difference = 3.0048445715164216e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.032137e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.431710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.431710e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.231910 sec +INFO: No Floating Point Exceptions have been reported + 6,175,976,175 cycles # 2.753 GHz + 12,601,670,185 instructions # 2.04 insn per cycle + 2.244594220 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 895,738,310 cycles:u # 2.385 GHz (75.30%) - 2,284,852 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.56%) - 5,964,182 stalled-cycles-backend:u # 0.67% backend cycles idle (75.25%) - 1,435,614,704 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.89%) - 0.411223145 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.534616e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.015525e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.015525e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.042072 sec +INFO: No Floating Point Exceptions have been reported + 5,669,805,165 cycles # 2.764 GHz + 12,036,562,183 instructions # 2.12 insn per cycle + 2.054181107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2519) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063423243869 - File "", line 1 - me1=; me2=2.0288063423243869; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.587962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783361e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.070999 sec +INFO: No Floating Point Exceptions have been reported + 5,754,052,644 cycles # 1.867 GHz + 8,225,264,257 instructions # 1.43 insn per cycle + 3.082911381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1863) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index ee77467769..f3ccad1744 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:03:29 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:13:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.403841e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.350229e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.960040e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.532145 sec +INFO: No Floating Point Exceptions have been reported + 2,202,225,096 cycles # 2.860 GHz + 3,140,327,784 instructions # 1.43 insn per cycle + 0.826897706 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028807e+00 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.849779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.898891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898891e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.812131 sec +INFO: No Floating Point Exceptions have been reported + 17,080,273,912 cycles # 2.934 GHz + 45,215,696,703 instructions # 2.65 insn per cycle + 5.823642020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063903750300 +Relative difference = 3.0048445715164216e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.353337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.532203e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.532203e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.272970 sec +INFO: No Floating Point Exceptions have been reported + 9,622,203,648 cycles # 2.930 GHz + 26,352,115,115 instructions # 2.74 insn per cycle + 3.284795843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063903750300 +Relative difference = 3.0048445715164216e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.499939e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.814164e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814164e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.448542 sec +INFO: No Floating Point Exceptions have been reported + 6,760,703,277 cycles # 2.754 GHz + 14,051,302,777 instructions # 2.08 insn per cycle + 2.455916079 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 914,849,134 cycles:u # 2.436 GHz (74.09%) - 2,451,232 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.59%) - 10,512,149 stalled-cycles-backend:u # 1.15% backend cycles idle (74.65%) - 1,396,328,721 instructions:u # 1.53 insn per cycle - # 0.01 stalled cycles per insn (74.70%) - 0.411717719 seconds time elapsed +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.725371e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.062901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.062901e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.336085 sec +INFO: No Floating Point Exceptions have been reported + 6,433,871,158 cycles # 2.746 GHz + 13,544,684,713 instructions # 2.11 insn per cycle + 2.343643276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0288063423243869 - File "", line 1 - me1=; me2=2.0288063423243869; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.546154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.730746e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.730746e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.074398 sec +INFO: No Floating Point Exceptions have been reported + 5,684,367,008 cycles # 1.845 GHz + 9,231,965,840 instructions # 1.62 insn per cycle + 3.081687192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 17030efec2..0fe4cfc922 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:31 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:13:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 795,818,848 cycles:u # 0.778 GHz (75.10%) - 2,347,473 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.35%) - 12,569,594 stalled-cycles-backend:u # 1.58% backend cycles idle (75.12%) - 1,353,252,025 instructions:u # 1.70 insn per cycle - # 0.01 stalled cycles per insn (75.16%) - 1.078805003 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.646703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.903323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.008440e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.472515 sec +INFO: No Floating Point Exceptions have been reported + 1,976,395,629 cycles # 2.864 GHz + 2,853,369,004 instructions # 1.44 insn per cycle + 0.746437756 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.044065e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229313e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240372e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.613454 sec +INFO: No Floating Point Exceptions have been reported + 2,457,306,952 cycles # 2.873 GHz + 3,760,458,763 instructions # 1.53 insn per cycle + 0.914581816 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 2,017,653,309 cycles:u # 2.808 GHz (75.55%) - 2,464,363 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.50%) - 10,357,340 stalled-cycles-backend:u # 0.51% backend cycles idle (75.54%) - 2,432,704,139 instructions:u # 1.21 insn per cycle - # 0.00 stalled cycles per insn (74.82%) - 0.755151771 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.423163e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.435086e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.435086e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.782254 sec +INFO: No Floating Point Exceptions have been reported + 19,933,068,888 cycles # 2.938 GHz + 59,910,639,029 instructions # 3.01 insn per cycle + 6.786428407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131213684418644 - File "", line 1 - me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.574256e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615519e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615519e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.601510 sec +INFO: No Floating Point Exceptions have been reported + 10,564,249,920 cycles # 2.931 GHz + 31,083,049,027 instructions # 2.94 insn per cycle + 3.605720194 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.109445e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.271688e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.271688e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.818340 sec +INFO: No Floating Point Exceptions have been reported + 4,995,758,651 cycles # 2.742 GHz + 11,404,411,821 instructions # 2.28 insn per cycle + 1.822613950 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.031962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.052518e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.052518e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.607425 sec +INFO: No Floating Point Exceptions have been reported + 4,440,179,427 cycles # 2.756 GHz + 10,663,032,994 instructions # 2.40 insn per cycle + 1.611644858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.095641e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.193407e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.193407e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.331432 sec +INFO: No Floating Point Exceptions have been reported + 4,128,663,715 cycles # 1.768 GHz + 5,965,561,050 instructions # 1.44 insn per cycle + 2.335809030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 40c6e9f5ef..555f99fae8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,59 +1,167 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:17:09 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:47:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 828,494,905 cycles:u # 2.504 GHz (76.67%) - 2,589,636 stalled-cycles-frontend:u # 0.31% frontend cycles idle (76.65%) - 21,646,845 stalled-cycles-backend:u # 2.61% backend cycles idle (75.46%) - 1,335,042,676 instructions:u # 1.61 insn per cycle - # 0.02 stalled cycles per insn (75.25%) - 0.364821007 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.462205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.092748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.092748e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.504368 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,053,925,551 cycles # 2.862 GHz + 3,120,835,610 instructions # 1.52 insn per cycle + 0.775288198 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 4,077,163,579 cycles:u # 3.002 GHz (75.22%) - 16,959,476 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.01%) - 837,378,519 stalled-cycles-backend:u # 20.54% backend cycles idle (75.14%) - 4,205,434,861 instructions:u # 1.03 insn per cycle - # 0.20 stalled cycles per insn (74.97%) - 1.391743442 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.695563e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.383097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.383097e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.834031 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,122,984,951 cycles # 2.884 GHz + 5,028,895,726 instructions # 1.61 insn per cycle + 1.144542739 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.420974e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433351e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433351e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.797187 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,924,578,475 cycles # 2.930 GHz + 59,919,807,490 instructions # 3.01 insn per cycle + 6.801426045 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.519993e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.562146e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.562146e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.653302 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,746,732,815 cycles # 2.939 GHz + 31,134,499,346 instructions # 2.90 insn per cycle + 3.657616586 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -61,11 +169,114 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131213684418644 - File "", line 1 - me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.065515e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.233179e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.233179e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.835810 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,036,150,788 cycles # 2.739 GHz + 11,457,434,104 instructions # 2.28 insn per cycle + 1.839969686 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.012086e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033024e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.647583 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,499,476,819 cycles # 2.725 GHz + 10,716,818,624 instructions # 2.38 insn per cycle + 1.651828196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.046506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.146814e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.146814e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.355729 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,171,753,505 cycles # 1.769 GHz + 6,006,835,350 instructions # 1.44 insn per cycle + 2.359914843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 28ea2b77d0..df418c0c55 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:13:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 790,067,314 cycles:u # 2.455 GHz (75.50%) - 2,353,075 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.86%) - 9,910,058 stalled-cycles-backend:u # 1.25% backend cycles idle (74.28%) - 1,334,713,958 instructions:u # 1.69 insn per cycle - # 0.01 stalled cycles per insn (75.12%) - 0.360079549 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.819441e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.940165e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.036270e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473171 sec +INFO: No Floating Point Exceptions have been reported + 1,973,905,564 cycles # 2.864 GHz + 2,835,389,936 instructions # 1.44 insn per cycle + 0.747859769 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.045923e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.239053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.249723e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.613723 sec +INFO: No Floating Point Exceptions have been reported + 2,468,512,324 cycles # 2.879 GHz + 3,722,507,305 instructions # 1.51 insn per cycle + 0.915283019 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 1,997,776,639 cycles:u # 2.793 GHz (75.32%) - 2,591,438 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.42%) - 5,068,348 stalled-cycles-backend:u # 0.25% backend cycles idle (74.82%) - 2,425,917,284 instructions:u # 1.21 insn per cycle - # 0.00 stalled cycles per insn (74.31%) - 0.753671044 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.416240e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.428350e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.428350e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.802073 sec +INFO: No Floating Point Exceptions have been reported + 19,919,234,926 cycles # 2.929 GHz + 60,126,857,831 instructions # 3.02 insn per cycle + 6.806341598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131213684418644 - File "", line 1 - me1=; me2=1.4131213684418644; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.628115e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.671006e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.671006e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.560395 sec +INFO: No Floating Point Exceptions have been reported + 10,470,027,689 cycles # 2.938 GHz + 30,685,175,745 instructions # 2.93 insn per cycle + 3.564357324 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.858005e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.013532e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.013532e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.870006 sec +INFO: No Floating Point Exceptions have been reported + 5,129,037,452 cycles # 2.738 GHz + 11,838,972,708 instructions # 2.31 insn per cycle + 1.873874088 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4748) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.652883e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.834044e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.834044e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.717936 sec +INFO: No Floating Point Exceptions have been reported + 4,726,163,144 cycles # 2.747 GHz + 11,165,051,323 instructions # 2.36 insn per cycle + 1.721718897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 245) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.029308e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.126499e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.126499e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.353551 sec +INFO: No Floating Point Exceptions have been reported + 4,165,348,623 cycles # 1.768 GHz + 6,220,012,480 instructions # 1.49 insn per cycle + 2.357450464 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1513) (512y: 140) (512z: 3679) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 5c62f736c5..0344b19ae4 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:39 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:14:17 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 795,425,366 cycles:u # 2.535 GHz (75.09%) - 2,323,507 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.76%) - 12,833,150 stalled-cycles-backend:u # 1.61% backend cycles idle (74.54%) - 1,318,876,921 instructions:u # 1.66 insn per cycle - # 0.01 stalled cycles per insn (74.18%) - 0.348861399 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.690436e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.002102e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.037933e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.455498 sec +INFO: No Floating Point Exceptions have been reported + 1,928,242,310 cycles # 2.870 GHz + 2,746,045,826 instructions # 1.42 insn per cycle + 0.728814382 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.680469e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.378510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424836e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.509889 sec +INFO: No Floating Point Exceptions have been reported + 2,121,031,452 cycles # 2.862 GHz + 3,036,959,694 instructions # 1.43 insn per cycle + 0.800065199 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,357,401,223 cycles:u # 2.729 GHz (74.51%) - 2,360,040 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.82%) - 5,861,415 stalled-cycles-backend:u # 0.43% backend cycles idle (75.00%) - 1,880,028,115 instructions:u # 1.39 insn per cycle - # 0.00 stalled cycles per insn (75.21%) - 0.534139334 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.504687e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.517708e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.517708e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.560247 sec +INFO: No Floating Point Exceptions have been reported + 19,257,464,373 cycles # 2.934 GHz + 59,612,594,917 instructions # 3.10 insn per cycle + 6.564375492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131674300257941 - File "", line 1 - me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.084277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.218488e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.218488e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.043801 sec +INFO: No Floating Point Exceptions have been reported + 6,009,096,977 cycles # 2.936 GHz + 17,060,655,087 instructions # 2.84 insn per cycle + 2.047534449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.741509e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801920e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801920e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.958794 sec +INFO: No Floating Point Exceptions have been reported + 2,632,796,186 cycles # 2.737 GHz + 6,187,347,650 instructions # 2.35 insn per cycle + 0.962496439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.912269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.986419e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.986419e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.875039 sec +INFO: No Floating Point Exceptions have been reported + 2,407,469,182 cycles # 2.742 GHz + 5,790,784,602 instructions # 2.41 insn per cycle + 0.878768885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.443174e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.485583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485583e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.155478 sec +INFO: No Floating Point Exceptions have been reported + 2,073,615,836 cycles # 1.790 GHz + 3,391,178,624 instructions # 1.64 insn per cycle + 1.159306518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 6aff05df93..93fdf05be3 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,59 +1,167 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:17:12 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:47:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 819,166,315 cycles:u # 2.563 GHz (75.32%) - 2,697,447 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.78%) - 27,322,515 stalled-cycles-backend:u # 3.34% backend cycles idle (74.60%) - 1,319,678,705 instructions:u # 1.61 insn per cycle - # 0.02 stalled cycles per insn (73.07%) - 0.352508429 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.480682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.545762e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.545762e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.469335 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,971,009,268 cycles # 2.861 GHz + 2,878,621,667 instructions # 1.46 insn per cycle + 0.747088963 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 3,382,949,380 cycles:u # 3.026 GHz (74.85%) - 16,737,570 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.97%) - 830,499,979 stalled-cycles-backend:u # 24.55% backend cycles idle (74.44%) - 3,669,566,461 instructions:u # 1.08 insn per cycle - # 0.23 stalled cycles per insn (74.74%) - 1.149279792 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.503732e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.296845e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.296845e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 +TOTAL : 0.653804 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,546,264,462 cycles # 2.875 GHz + 3,884,000,523 instructions # 1.53 insn per cycle + 0.944357505 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.503119e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.516211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.516211e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.568401 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,288,292,034 cycles # 2.936 GHz + 59,615,397,281 instructions # 3.09 insn per cycle + 6.572330246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.074244e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.211619e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.211619e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.051717 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,032,564,255 cycles # 2.936 GHz + 17,108,905,426 instructions # 2.84 insn per cycle + 2.055577630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -61,11 +169,114 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131674300257941 - File "", line 1 - me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.735664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.796597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.796597e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.965943 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,652,269,080 cycles # 2.742 GHz + 6,224,274,753 instructions # 2.35 insn per cycle + 0.969710233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.901208e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.974602e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.974602e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.883566 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,424,412,243 cycles # 2.734 GHz + 5,827,930,388 instructions # 2.40 insn per cycle + 0.887425140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.438672e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.481527e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.163548 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,093,109,210 cycles # 1.794 GHz + 3,432,132,802 instructions # 1.64 insn per cycle + 1.167531871 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index d98d10b469..04fc107fbb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:41 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:14:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 804,704,967 cycles:u # 2.580 GHz (74.99%) - 2,485,868 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.03%) - 5,581,551 stalled-cycles-backend:u # 0.69% backend cycles idle (74.61%) - 1,289,552,140 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.72%) - 0.349270365 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.693098e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.040838e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074373e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.456865 sec +INFO: No Floating Point Exceptions have been reported + 1,922,600,682 cycles # 2.857 GHz + 2,735,771,538 instructions # 1.42 insn per cycle + 0.729676644 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.681790e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366127e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406124e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.509422 sec +INFO: No Floating Point Exceptions have been reported + 2,117,565,229 cycles # 2.869 GHz + 3,056,275,302 instructions # 1.44 insn per cycle + 0.796290434 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 1,338,950,680 cycles:u # 2.709 GHz (74.38%) - 2,324,316 stalled-cycles-frontend:u # 0.17% frontend cycles idle (76.38%) - 10,977,568 stalled-cycles-backend:u # 0.82% backend cycles idle (76.62%) - 1,846,343,229 instructions:u # 1.38 insn per cycle - # 0.01 stalled cycles per insn (75.41%) - 0.529564954 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.491972e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504565e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.504565e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.593721 sec +INFO: No Floating Point Exceptions have been reported + 19,401,378,848 cycles # 2.941 GHz + 59,351,233,195 instructions # 3.06 insn per cycle + 6.597810534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131674300257941 - File "", line 1 - me1=; me2=1.4131674300257941; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.427450e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.574205e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.574205e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.961524 sec +INFO: No Floating Point Exceptions have been reported + 5,763,417,063 cycles # 2.934 GHz + 16,848,552,420 instructions # 2.92 insn per cycle + 1.965663621 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5611) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.513418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.559668e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.559668e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.100420 sec +INFO: No Floating Point Exceptions have been reported + 3,014,454,268 cycles # 2.733 GHz + 6,847,622,992 instructions # 2.27 insn per cycle + 1.104094178 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.641246e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.695747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.695747e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.015943 sec +INFO: No Floating Point Exceptions have been reported + 2,793,517,683 cycles # 2.742 GHz + 6,436,907,448 instructions # 2.30 insn per cycle + 1.019630864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.322600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.358678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.358678e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.258576 sec +INFO: No Floating Point Exceptions have been reported + 2,248,626,373 cycles # 1.783 GHz + 3,754,168,834 instructions # 1.67 insn per cycle + 1.262333902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2466) (512y: 29) (512z: 4084) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 67b9b653dc..2641b6a6f8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:14:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 816,747,087 cycles:u # 2.541 GHz (74.32%) - 2,217,663 stalled-cycles-frontend:u # 0.27% frontend cycles idle (77.46%) - 7,439,488 stalled-cycles-backend:u # 0.91% backend cycles idle (76.82%) - 1,328,789,320 instructions:u # 1.63 insn per cycle - # 0.01 stalled cycles per insn (76.44%) - 0.361184807 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.553984e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.813684e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.925982e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473727 sec +INFO: No Floating Point Exceptions have been reported + 1,964,125,217 cycles # 2.842 GHz + 2,850,802,933 instructions # 1.45 insn per cycle + 0.747533169 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.039046e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.224514e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235497e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.623745 sec +INFO: No Floating Point Exceptions have been reported + 2,491,113,981 cycles # 2.884 GHz + 3,741,355,868 instructions # 1.50 insn per cycle + 0.924633822 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 2,050,860,031 cycles:u # 2.855 GHz (75.25%) - 2,539,060 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.60%) - 5,619,619 stalled-cycles-backend:u # 0.27% backend cycles idle (75.72%) - 2,421,380,911 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (74.15%) - 0.757114524 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.390069e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.401784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.401784e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.876043 sec +INFO: No Floating Point Exceptions have been reported + 20,176,006,103 cycles # 2.934 GHz + 60,944,588,650 instructions # 3.02 insn per cycle + 6.880217907 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131213755569483 - File "", line 1 - me1=; me2=1.4131213755569483; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213859069593 +Relative difference = 4.345647726386255e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.624106e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.667122e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.667122e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.563355 sec +INFO: No Floating Point Exceptions have been reported + 10,467,283,500 cycles # 2.935 GHz + 30,820,693,493 instructions # 2.94 insn per cycle + 3.567171047 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213792564823 +Relative difference = 4.392710025734405e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.172379e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.336577e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.336577e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.806966 sec +INFO: No Floating Point Exceptions have been reported + 4,954,879,411 cycles # 2.737 GHz + 11,359,422,816 instructions # 2.29 insn per cycle + 1.810872816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.036379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.057513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.057513e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.601666 sec +INFO: No Floating Point Exceptions have been reported + 4,380,983,099 cycles # 2.729 GHz + 10,610,165,712 instructions # 2.42 insn per cycle + 1.605990710 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 83) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.900446e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.995461e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.995461e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.397174 sec +INFO: No Floating Point Exceptions have been reported + 4,245,323,919 cycles # 1.769 GHz + 6,166,210,089 instructions # 1.45 insn per cycle + 2.401100901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2140) (512y: 117) (512z: 3653) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 31382f9560..0766319c3b 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-17_09:03:48 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:15:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 805,702,716 cycles:u # 2.517 GHz (75.98%) - 2,417,935 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.25%) - 11,480,390 stalled-cycles-backend:u # 1.42% backend cycles idle (74.63%) - 1,356,931,070 instructions:u # 1.68 insn per cycle - # 0.01 stalled cycles per insn (74.03%) - 0.357191421 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.556212e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.906743e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.026143e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.472306 sec +INFO: No Floating Point Exceptions have been reported + 1,982,520,983 cycles # 2.874 GHz + 2,863,074,866 instructions # 1.44 insn per cycle + 0.745845869 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.042838e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242155e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.611235 sec +INFO: No Floating Point Exceptions have been reported + 2,455,829,243 cycles # 2.879 GHz + 3,741,729,771 instructions # 1.52 insn per cycle + 0.912428146 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.413122e+00 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 2,030,800,576 cycles:u # 2.836 GHz (75.61%) - 2,533,232 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.12%) - 10,284,240 stalled-cycles-backend:u # 0.51% backend cycles idle (74.94%) - 2,396,848,084 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (74.52%) - 0.752414920 seconds time elapsed +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 2.386940e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.398394e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398394e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.884140 sec +INFO: No Floating Point Exceptions have been reported + 20,272,568,697 cycles # 2.944 GHz + 61,168,730,148 instructions # 3.02 insn per cycle + 6.888274413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 1.4131213755569483 - File "", line 1 - me1=; me2=1.4131213755569483; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213859069593 +Relative difference = 4.345647726386255e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.669440e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.713215e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.713215e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.529000 sec +INFO: No Floating Point Exceptions have been reported + 10,335,535,502 cycles # 2.926 GHz + 30,533,410,675 instructions # 2.95 insn per cycle + 3.532867905 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213792564823 +Relative difference = 4.392710025734405e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.803371e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.957146e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.957146e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.880842 sec +INFO: No Floating Point Exceptions have been reported + 5,141,108,977 cycles # 2.729 GHz + 11,871,626,607 instructions # 2.31 insn per cycle + 1.885060685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.734351e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.920231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.920231e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.703697 sec +INFO: No Floating Point Exceptions have been reported + 4,677,605,202 cycles # 2.740 GHz + 11,166,557,237 instructions # 2.39 insn per cycle + 1.707597039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 238) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.863155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.956338e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.956338e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.409053 sec +INFO: No Floating Point Exceptions have been reported + 4,255,960,621 cycles # 1.764 GHz + 6,404,237,522 instructions # 1.50 insn per cycle + 2.413297760 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 163) (512z: 3731) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index aaa3cb9985..5f3726dcea 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:03:51 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:15:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,672,827,040 cycles:u # 1.274 GHz (74.29%) - 2,536,395 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.97%) - 5,501,869 stalled-cycles-backend:u # 0.33% backend cycles idle (75.35%) - 2,005,314,113 instructions:u # 1.20 insn per cycle - # 0.00 stalled cycles per insn (75.22%) - 1.370037399 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.308012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.334511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336215e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.533591 sec +INFO: No Floating Point Exceptions have been reported + 2,205,805,938 cycles # 2.869 GHz + 3,444,884,387 instructions # 1.56 insn per cycle + 0.825307966 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.131073e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162405e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.048991 sec +INFO: No Floating Point Exceptions have been reported + 9,688,028,273 cycles # 2.924 GHz + 22,036,541,373 instructions # 2.27 insn per cycle + 3.369850770 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 22,771,879,189 cycles:u # 3.429 GHz (74.99%) - 3,632,595 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) - 7,645,494 stalled-cycles-backend:u # 0.03% backend cycles idle (75.00%) - 20,219,629,853 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 6.681399663 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.879946e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.880868e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880868e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.730075 sec +INFO: No Floating Point Exceptions have been reported + 25,643,153,835 cycles # 2.937 GHz + 78,954,437,611 instructions # 3.08 insn per cycle + 8.734432118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141133E-004 +Relative difference = 2.8372990776517314e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.520374e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.523613e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523613e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.665332 sec +INFO: No Floating Point Exceptions have been reported + 13,099,128,105 cycles # 2.806 GHz + 39,559,591,481 instructions # 3.02 insn per cycle + 4.669271517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141122E-004 +Relative difference = 2.837299079287849e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.059011e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.075081e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.075081e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.042413 sec +INFO: No Floating Point Exceptions have been reported + 5,610,747,752 cycles # 2.743 GHz + 13,824,504,616 instructions # 2.46 insn per cycle + 2.046398223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.162703e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.184308e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.184308e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.796726 sec +INFO: No Floating Point Exceptions have been reported + 4,922,237,700 cycles # 2.735 GHz + 12,506,994,545 instructions # 2.54 insn per cycle + 1.800589813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.982168e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.994583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.994583e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.355452 sec +INFO: No Floating Point Exceptions have been reported + 4,140,123,386 cycles # 1.756 GHz + 6,390,153,387 instructions # 1.54 insn per cycle + 2.359734916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 49fa73b6a5..1d93db579b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,59 +1,167 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:17:29 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:48:14 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,681,929,144 cycles:u # 2.927 GHz (75.10%) - 3,388,305 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.26%) - 47,046,516 stalled-cycles-backend:u # 2.80% backend cycles idle (75.35%) - 2,098,819,459 instructions:u # 1.25 insn per cycle - # 0.02 stalled cycles per insn (74.67%) - 0.608519815 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.969430e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.268357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.268357e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523555 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,166,841,175 cycles # 2.867 GHz + 3,453,451,458 instructions # 1.59 insn per cycle + 0.814918597 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 25,532,544,588 cycles:u # 3.408 GHz (75.00%) - 39,137,847 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) - 1,119,010,439 stalled-cycles-backend:u # 4.38% backend cycles idle (75.00%) - 22,646,170,640 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 7.526076810 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.613032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.091578e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.091578e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.317499 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,422,723,136 cycles # 2.898 GHz + 15,879,167,379 instructions # 1.52 insn per cycle + 3.658545225 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.878765e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.879684e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.879684e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.741583 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 25,666,397,830 cycles # 2.935 GHz + 78,965,262,045 instructions # 3.08 insn per cycle + 8.745862119 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141133E-004 +Relative difference = 2.8372990776517314e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.560977e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.564368e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.564368e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.617447 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,088,026,122 cycles # 2.833 GHz + 39,572,731,788 instructions # 3.02 insn per cycle + 4.621932955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -61,11 +169,114 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141122E-004 +Relative difference = 2.837299079287849e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.016507e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.032941e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.032941e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.058059 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,631,279,447 cycles # 2.732 GHz + 13,836,775,240 instructions # 2.46 insn per cycle + 2.062638485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.172752e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.195878e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.195878e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.800276 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,940,734,767 cycles # 2.740 GHz + 12,518,660,568 instructions # 2.53 insn per cycle + 1.804734715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.912888e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.925297e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.925297e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.384873 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,161,817,801 cycles # 1.743 GHz + 6,405,054,448 instructions # 1.54 insn per cycle + 2.389410885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 9f61e25745..fc2e4b7aa0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:19:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:59:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,706,363,197 cycles:u # 3.011 GHz (74.82%) - 3,277,719 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.88%) - 37,627,569 stalled-cycles-backend:u # 2.21% backend cycles idle (74.60%) - 2,063,772,296 instructions:u # 1.21 insn per cycle - # 0.02 stalled cycles per insn (74.16%) - 0.597226347 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.322702e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.346002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.347615e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.517480 sec +INFO: No Floating Point Exceptions have been reported + 2,154,192,085 cycles # 2.875 GHz + 3,384,532,263 instructions # 1.57 insn per cycle + 0.808566781 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.137866e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168584e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 3.158061 sec +INFO: No Floating Point Exceptions have been reported + 9,917,250,541 cycles # 2.905 GHz + 22,199,780,027 instructions # 2.24 insn per cycle + 3.469925481 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 25,046,951,153 cycles:u # 3.416 GHz (75.01%) - 28,734,977 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.01%) - 1,094,781,598 stalled-cycles-backend:u # 4.37% backend cycles idle (75.02%) - 21,796,222,378 instructions:u # 0.87 insn per cycle - # 0.05 stalled cycles per insn (74.96%) - 7.363577246 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.876744e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877706e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 8.747062 sec +INFO: No Floating Point Exceptions have been reported + 25,648,612,426 cycles # 2.931 GHz + 78,952,780,288 instructions # 3.08 insn per cycle + 8.750949596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141133E-004 +Relative difference = 2.8372990776517314e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.516490e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519795e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519795e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.672214 sec +INFO: No Floating Point Exceptions have been reported + 13,064,181,413 cycles # 2.795 GHz + 39,557,975,845 instructions # 3.03 insn per cycle + 4.676162125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141122E-004 +Relative difference = 2.837299079287849e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.035027e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.051957e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.051957e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.050050 sec +INFO: No Floating Point Exceptions have been reported + 5,620,854,320 cycles # 2.738 GHz + 13,824,518,317 instructions # 2.46 insn per cycle + 2.054209584 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.147098e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.168017e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.168017e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.801905 sec +INFO: No Floating Point Exceptions have been reported + 4,929,552,945 cycles # 2.731 GHz + 12,503,971,607 instructions # 2.54 insn per cycle + 1.805808510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.940588e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.953237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.953237e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.372033 sec +INFO: No Floating Point Exceptions have been reported + 4,149,036,914 cycles # 1.747 GHz + 6,390,952,192 instructions # 1.54 insn per cycle + 2.375889237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 5c6b4d7393..794fb1a802 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,53 +1,120 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:19:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:53:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,706,371,987 cycles:u # 2.992 GHz (75.54%) - 3,631,847 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.97%) - 38,649,177 stalled-cycles-backend:u # 2.26% backend cycles idle (74.77%) - 2,082,455,921 instructions:u # 1.22 insn per cycle - # 0.02 stalled cycles per insn (73.61%) - 0.612529378 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.043181e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.332865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.334802e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.523228 sec +INFO: No Floating Point Exceptions have been reported + 2,169,607,107 cycles # 2.877 GHz + 3,459,237,306 instructions # 1.59 insn per cycle + 0.815202021 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.724697e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.162122e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163349e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.220306 sec +INFO: No Floating Point Exceptions have been reported + 10,133,189,238 cycles # 2.914 GHz + 23,148,993,968 instructions # 2.28 insn per cycle + 3.534065636 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 25,402,369,575 cycles:u # 3.420 GHz (75.04%) - 39,113,205 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) - 1,097,225,273 stalled-cycles-backend:u # 4.32% backend cycles idle (74.96%) - 22,608,680,644 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (74.95%) - 7.452100580 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.879192e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.880098e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880098e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.734016 sec +INFO: No Floating Point Exceptions have been reported + 25,661,507,280 cycles # 2.937 GHz + 78,953,590,713 instructions # 3.08 insn per cycle + 8.737772518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -55,11 +122,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141133E-004 +Relative difference = 2.8372990776517314e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.527947e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.531121e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.531121e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.656259 sec +INFO: No Floating Point Exceptions have been reported + 13,056,695,540 cycles # 2.803 GHz + 39,560,471,761 instructions # 3.03 insn per cycle + 4.660116201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141122E-004 +Relative difference = 2.837299079287849e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.991321e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.007389e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.007389e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.059547 sec +INFO: No Floating Point Exceptions have been reported + 5,612,692,411 cycles # 2.721 GHz + 13,825,461,651 instructions # 2.46 insn per cycle + 2.063599543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.168393e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.189788e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.189788e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.795888 sec +INFO: No Floating Point Exceptions have been reported + 4,923,088,043 cycles # 2.737 GHz + 12,506,721,234 instructions # 2.54 insn per cycle + 1.799844991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.949785e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.962226e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.962226e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.367179 sec +INFO: No Floating Point Exceptions have been reported + 4,144,948,568 cycles # 1.750 GHz + 6,391,796,529 instructions # 1.54 insn per cycle + 2.371216811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 8581774aa9..80da0089a3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:04:02 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:16:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 1,563,080,932 cycles:u # 2.897 GHz (75.08%) - 2,335,713 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.45%) - 5,785,858 stalled-cycles-backend:u # 0.37% backend cycles idle (74.46%) - 2,027,837,198 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (74.18%) - 0.577265916 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.297695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.326014e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328135e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536243 sec +INFO: No Floating Point Exceptions have been reported + 2,198,395,650 cycles # 2.855 GHz + 3,395,768,128 instructions # 1.54 insn per cycle + 0.828484590 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.139738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.170223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.171507e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.043962 sec +INFO: No Floating Point Exceptions have been reported + 9,646,114,898 cycles # 2.920 GHz + 22,170,499,370 instructions # 2.30 insn per cycle + 3.361578134 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 22,165,132,675 cycles:u # 3.427 GHz (75.02%) - 3,483,789 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 7,652,025 stalled-cycles-backend:u # 0.03% backend cycles idle (75.01%) - 19,788,795,973 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 6.507227844 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.884927e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885834e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885834e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.707889 sec +INFO: No Floating Point Exceptions have been reported + 25,619,332,595 cycles # 2.941 GHz + 78,702,929,908 instructions # 3.07 insn per cycle + 8.712420077 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141133E-004 +Relative difference = 2.8372990776517314e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.574307e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.577560e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577560e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.595188 sec +INFO: No Floating Point Exceptions have been reported + 13,048,399,086 cycles # 2.838 GHz + 39,450,691,251 instructions # 3.02 insn per cycle + 4.599210719 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141122E-004 +Relative difference = 2.837299079287849e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.930739e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.946422e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.946422e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.074898 sec +INFO: No Floating Point Exceptions have been reported + 5,675,338,380 cycles # 2.732 GHz + 13,910,840,784 instructions # 2.45 insn per cycle + 2.079006346 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.062486e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.083766e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.083766e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.816561 sec +INFO: No Floating Point Exceptions have been reported + 4,996,440,015 cycles # 2.746 GHz + 12,603,390,155 instructions # 2.52 insn per cycle + 1.820566072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.965367e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.977715e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.977715e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.361480 sec +INFO: No Floating Point Exceptions have been reported + 4,159,091,159 cycles # 1.759 GHz + 6,499,576,244 instructions # 1.56 insn per cycle + 2.365402468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1751) (512y: 194) (512z: 9382) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 2d3a62200d..a149b91e1f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:11:55 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:36:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception - 1,661,905,073 cycles:u # 2.966 GHz (74.96%) - 2,555,694 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.68%) - 6,486,605 stalled-cycles-backend:u # 0.39% backend cycles idle (75.73%) - 2,055,254,079 instructions:u # 1.24 insn per cycle - # 0.00 stalled cycles per insn (75.84%) - 0.598193571 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.106076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.131267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.133080e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.544695 sec +INFO: No Floating Point Exceptions have been reported + 2,219,428,600 cycles # 2.858 GHz + 3,493,527,234 instructions # 1.57 insn per cycle + 0.834499500 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.753348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.778591e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779655e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.312449 sec +INFO: No Floating Point Exceptions have been reported + 10,421,899,087 cycles # 2.913 GHz + 24,058,421,553 instructions # 2.31 insn per cycle + 3.636091022 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe: Floating point exception - 22,752,766,839 cycles:u # 3.426 GHz (74.95%) - 3,567,249 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 6,939,581 stalled-cycles-backend:u # 0.03% backend cycles idle (75.07%) - 20,271,008,331 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 6.678436209 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 4.268976e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.269450e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.269450e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.423779 sec +INFO: No Floating Point Exceptions have been reported + 112,730,268,623 cycles # 2.934 GHz + 144,772,135,406 instructions # 1.28 insn per cycle + 38.427951659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198140461E-004 +Relative difference = 2.8372991790910424e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.077363e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.079742e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.079742e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.335511 sec +INFO: No Floating Point Exceptions have been reported + 14,752,370,812 cycles # 2.763 GHz + 37,645,694,563 instructions # 2.55 insn per cycle + 5.339828429 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.332306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.345602e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.345602e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.243210 sec +INFO: No Floating Point Exceptions have been reported + 6,130,370,628 cycles # 2.729 GHz + 13,060,931,234 instructions # 2.13 insn per cycle + 2.247428561 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46973) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.812729e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.832105e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.832105e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.867280 sec +INFO: No Floating Point Exceptions have been reported + 5,063,580,201 cycles # 2.707 GHz + 11,453,397,200 instructions # 2.26 insn per cycle + 1.871531437 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40498) (512y: 285) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.266726e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.280024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.280024e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.263638 sec +INFO: No Floating Point Exceptions have been reported + 3,957,788,966 cycles # 1.746 GHz + 5,926,468,977 instructions # 1.50 insn per cycle + 2.267826067 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39349) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 0c0eab7b00..c0add05aa1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:12:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:38:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception - 1,585,284,024 cycles:u # 2.916 GHz (75.24%) - 2,383,798 stalled-cycles-frontend:u # 0.15% frontend cycles idle (73.60%) - 5,588,685 stalled-cycles-backend:u # 0.35% backend cycles idle (74.30%) - 2,036,305,234 instructions:u # 1.28 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 0.580606077 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.096121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.123086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.125058e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.541972 sec +INFO: No Floating Point Exceptions have been reported + 2,219,963,696 cycles # 2.870 GHz + 3,470,909,979 instructions # 1.56 insn per cycle + 0.830712751 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.756387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.782287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783295e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.298693 sec +INFO: No Floating Point Exceptions have been reported + 10,399,316,447 cycles # 2.922 GHz + 23,584,057,660 instructions # 2.27 insn per cycle + 3.614068267 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe: Floating point exception - 22,138,389,204 cycles:u # 3.424 GHz (75.07%) - 3,668,578 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.13%) - 8,480,796 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) - 19,767,092,627 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (74.88%) - 6.501829671 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 4.224460e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.224899e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.224899e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.827526 sec +INFO: No Floating Point Exceptions have been reported + 113,783,414,735 cycles # 2.930 GHz + 144,278,309,276 instructions # 1.27 insn per cycle + 38.831628591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6266731198158101E-004 - File "", line 1 - me1=; me2=6.6266731198158101E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198140450E-004 +Relative difference = 2.83729918072716e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.989108e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.991357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.492626 sec +INFO: No Floating Point Exceptions have been reported + 15,275,599,565 cycles # 2.780 GHz + 38,389,599,156 instructions # 2.51 insn per cycle + 5.496788286 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.497881e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.512338e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.512338e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.193501 sec +INFO: No Floating Point Exceptions have been reported + 6,019,122,923 cycles # 2.740 GHz + 12,933,620,431 instructions # 2.15 insn per cycle + 2.197765722 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46099) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.839318e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.859960e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.859960e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.861821 sec +INFO: No Floating Point Exceptions have been reported + 5,093,783,286 cycles # 2.731 GHz + 11,449,481,812 instructions # 2.25 insn per cycle + 1.866150033 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40142) (512y: 219) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.279822e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.293417e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.293417e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.259516 sec +INFO: No Floating Point Exceptions have been reported + 3,958,337,222 cycles # 1.750 GHz + 5,889,113,860 instructions # 1.49 insn per cycle + 2.263750575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38927) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 751330cae0..f53bdfcb06 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:04:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:16:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,418,805,040 cycles:u # 2.875 GHz (75.49%) - 2,313,274 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.64%) - 6,853,709 stalled-cycles-backend:u # 0.48% backend cycles idle (74.21%) - 1,875,134,710 instructions:u # 1.32 insn per cycle - # 0.00 stalled cycles per insn (74.32%) - 0.530722972 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.467249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.509285e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513718e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.494218 sec +INFO: No Floating Point Exceptions have been reported + 2,034,103,432 cycles # 2.860 GHz + 3,045,186,386 instructions # 1.50 insn per cycle + 0.768364063 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.128844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.193248e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.796967 sec +INFO: No Floating Point Exceptions have been reported + 5,948,917,067 cycles # 2.924 GHz + 12,254,957,631 instructions # 2.06 insn per cycle + 2.089755272 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 13,382,958,070 cycles:u # 3.382 GHz (75.13%) - 4,088,491 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.06%) - 6,955,897 stalled-cycles-backend:u # 0.05% backend cycles idle (74.90%) - 12,294,040,023 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (74.84%) - 3.993439825 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.939424e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940380e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.461205 sec +INFO: No Floating Point Exceptions have been reported + 24,939,277,475 cycles # 2.947 GHz + 79,109,068,255 instructions # 3.17 insn per cycle + 8.465315543 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.989306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.001573e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.001573e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.351266 sec +INFO: No Floating Point Exceptions have been reported + 6,525,064,847 cycles # 2.771 GHz + 20,269,487,959 instructions # 3.11 insn per cycle + 2.355049106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.582613e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.589051e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589051e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.041194 sec +INFO: No Floating Point Exceptions have been reported + 2,848,829,047 cycles # 2.729 GHz + 7,065,493,216 instructions # 2.48 insn per cycle + 1.044894531 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.794003e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.802231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.802231e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.919231 sec +INFO: No Floating Point Exceptions have been reported + 2,522,001,135 cycles # 2.735 GHz + 6,403,495,458 instructions # 2.54 insn per cycle + 0.923373159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.403418e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.408437e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.408437e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.173408 sec +INFO: No Floating Point Exceptions have been reported + 2,065,585,282 cycles # 1.756 GHz + 3,303,212,083 instructions # 1.60 insn per cycle + 1.177101647 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 1eefdd2ba8..99ccf0b7c6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,59 +1,167 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:17:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:48:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,499,735,570 cycles:u # 2.874 GHz (75.92%) - 2,905,221 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.49%) - 71,774,380 stalled-cycles-backend:u # 4.79% backend cycles idle (75.06%) - 1,842,322,240 instructions:u # 1.23 insn per cycle - # 0.04 stalled cycles per insn (74.87%) - 0.553912247 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.945945e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.468849e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.468849e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.482042 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,997,062,353 cycles # 2.864 GHz + 3,031,546,242 instructions # 1.52 insn per cycle + 0.755384112 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 18,087,443,953 cycles:u # 3.395 GHz (74.95%) - 30,366,376 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.02%) - 2,148,114,724 stalled-cycles-backend:u # 11.88% backend cycles idle (75.08%) - 14,456,886,209 instructions:u # 0.80 insn per cycle - # 0.15 stalled cycles per insn (75.03%) - 5.362091783 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.954589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.017029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.017029e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 2.150287 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,911,046,669 cycles # 2.894 GHz + 9,720,301,924 instructions # 1.41 insn per cycle + 2.447262326 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.933142e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934088e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934088e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.491300 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 24,914,436,852 cycles # 2.933 GHz + 79,112,976,787 instructions # 3.18 insn per cycle + 8.495346137 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.985457e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.998623e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.998623e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.355396 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,536,388,186 cycles # 2.771 GHz + 20,278,657,318 instructions # 3.10 insn per cycle + 2.359340287 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -61,11 +169,114 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.588469e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.595113e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.595113e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.040170 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,854,653,705 cycles # 2.736 GHz + 7,075,192,119 instructions # 2.48 insn per cycle + 1.044116961 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.764583e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.772721e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.772721e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.937436 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,529,004,808 cycles # 2.688 GHz + 6,413,196,189 instructions # 2.54 insn per cycle + 0.941494819 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.393518e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.398724e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.398724e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.184860 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,077,752,907 cycles # 1.749 GHz + 3,313,647,639 instructions # 1.59 insn per cycle + 1.188846310 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 49d6289b93..19f64c3e7a 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:19:55 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:59:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,483,123,567 cycles:u # 2.868 GHz (74.57%) - 2,717,393 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.89%) - 71,784,445 stalled-cycles-backend:u # 4.84% backend cycles idle (74.13%) - 1,843,882,297 instructions:u # 1.24 insn per cycle - # 0.04 stalled cycles per insn (74.27%) - 0.564320838 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.517186e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.553165e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.556664e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 +TOTAL : 0.477606 sec +INFO: No Floating Point Exceptions have been reported + 1,989,569,741 cycles # 2.873 GHz + 3,005,042,417 instructions # 1.51 insn per cycle + 0.749585148 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.132207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192869e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.892599 sec +INFO: No Floating Point Exceptions have been reported + 6,180,255,032 cycles # 2.913 GHz + 13,158,154,431 instructions # 2.13 insn per cycle + 2.179693271 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 17,890,534,241 cycles:u # 3.412 GHz (74.94%) - 20,006,662 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 2,147,565,042 stalled-cycles-backend:u # 12.00% backend cycles idle (74.98%) - 13,812,088,209 instructions:u # 0.77 insn per cycle - # 0.16 stalled cycles per insn (74.97%) - 5.275581467 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.919400e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.920348e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.920348e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 8.550896 sec +INFO: No Floating Point Exceptions have been reported + 24,917,761,266 cycles # 2.921 GHz + 79,107,928,249 instructions # 3.17 insn per cycle + 8.554412617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.947395e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.959971e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.959971e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 2.367170 sec +INFO: No Floating Point Exceptions have been reported + 6,537,639,637 cycles # 2.759 GHz + 20,270,199,231 instructions # 3.10 insn per cycle + 2.370801699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.589719e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.596352e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.596352e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 1.038242 sec +INFO: No Floating Point Exceptions have been reported + 2,854,773,942 cycles # 2.742 GHz + 7,065,309,093 instructions # 2.47 insn per cycle + 1.041774606 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.793297e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.801693e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.801693e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.921472 sec +INFO: No Floating Point Exceptions have been reported + 2,523,779,273 cycles # 2.730 GHz + 6,401,399,707 instructions # 2.54 insn per cycle + 0.925110369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.398357e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403401e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403401e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 1.179617 sec +INFO: No Floating Point Exceptions have been reported + 2,071,965,297 cycles # 1.751 GHz + 3,301,502,867 instructions # 1.59 insn per cycle + 1.184374263 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 58b75480e2..dd6ac10521 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,53 +1,120 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:19:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:54:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,494,375,955 cycles:u # 2.883 GHz (75.37%) - 2,853,013 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.32%) - 71,723,714 stalled-cycles-backend:u # 4.80% backend cycles idle (75.23%) - 1,837,402,005 instructions:u # 1.23 insn per cycle - # 0.04 stalled cycles per insn (74.64%) - 0.544142086 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.065242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.543001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.546426e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.481262 sec +INFO: No Floating Point Exceptions have been reported + 1,996,897,335 cycles # 2.872 GHz + 2,970,634,149 instructions # 1.49 insn per cycle + 0.754034663 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.141287e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.200996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.203446e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.902964 sec +INFO: No Floating Point Exceptions have been reported + 6,190,673,302 cycles # 2.901 GHz + 13,306,269,630 instructions # 2.15 insn per cycle + 2.189368892 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 18,159,840,783 cycles:u # 3.407 GHz (75.00%) - 30,335,851 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.09%) - 2,143,390,625 stalled-cycles-backend:u # 11.80% backend cycles idle (74.95%) - 14,514,491,057 instructions:u # 0.80 insn per cycle - # 0.15 stalled cycles per insn (74.85%) - 5.356682373 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.933487e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934407e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934407e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.487231 sec +INFO: No Floating Point Exceptions have been reported + 24,900,546,223 cycles # 2.933 GHz + 79,107,234,987 instructions # 3.18 insn per cycle + 8.490956598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -55,11 +122,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.983704e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.996217e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.996217e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.353085 sec +INFO: No Floating Point Exceptions have been reported + 6,541,995,614 cycles # 2.777 GHz + 20,269,407,860 instructions # 3.10 insn per cycle + 2.356873297 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.585450e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.591953e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591953e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.039372 sec +INFO: No Floating Point Exceptions have been reported + 2,850,375,088 cycles # 2.735 GHz + 7,065,899,998 instructions # 2.48 insn per cycle + 1.043028953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.792341e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800787e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800787e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.920003 sec +INFO: No Floating Point Exceptions have been reported + 2,517,551,147 cycles # 2.728 GHz + 6,403,207,803 instructions # 2.54 insn per cycle + 0.923687532 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.402198e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.407285e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.407285e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.174725 sec +INFO: No Floating Point Exceptions have been reported + 2,068,104,176 cycles # 1.756 GHz + 3,303,725,822 instructions # 1.60 insn per cycle + 1.178407380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index c40d39c4c8..0807d31ee5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:04:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:17:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 1,363,608,148 cycles:u # 2.811 GHz (74.70%) - 2,330,167 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.97%) - 8,399,284 stalled-cycles-backend:u # 0.62% backend cycles idle (75.57%) - 1,862,082,973 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (75.68%) - 0.521364052 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.482391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.527725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.532031e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493760 sec +INFO: No Floating Point Exceptions have been reported + 2,047,228,800 cycles # 2.877 GHz + 3,039,242,832 instructions # 1.48 insn per cycle + 0.768472979 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.102852e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.164567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.167207e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.802499 sec +INFO: No Floating Point Exceptions have been reported + 5,908,213,594 cycles # 2.909 GHz + 12,456,477,911 instructions # 2.11 insn per cycle + 2.093284072 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 13,402,881,221 cycles:u # 3.400 GHz (75.04%) - 3,099,577 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) - 6,402,571 stalled-cycles-backend:u # 0.05% backend cycles idle (75.14%) - 12,222,937,447 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (75.23%) - 3.977923159 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.936218e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.937180e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937180e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.475402 sec +INFO: No Floating Point Exceptions have been reported + 24,949,332,764 cycles # 2.943 GHz + 78,839,555,653 instructions # 3.16 insn per cycle + 8.479529977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274866250177339E-004 +Relative difference = 5.65798569465384e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.122699e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.135567e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.135567e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.306947 sec +INFO: No Floating Point Exceptions have been reported + 6,466,639,499 cycles # 2.800 GHz + 20,230,851,658 instructions # 3.13 insn per cycle + 2.310638118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861448331612E-004 +Relative difference = 2.1853408865157068e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.507818e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.513887e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.513887e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.092466 sec +INFO: No Floating Point Exceptions have been reported + 2,980,915,950 cycles # 2.722 GHz + 7,206,628,057 instructions # 2.42 insn per cycle + 1.096222389 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.724603e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732183e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.732183e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.955874 sec +INFO: No Floating Point Exceptions have been reported + 2,613,667,112 cycles # 2.726 GHz + 6,544,516,026 instructions # 2.50 insn per cycle + 0.959652526 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 26) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.352025e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.356715e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.356715e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.218129 sec +INFO: No Floating Point Exceptions have been reported + 2,137,040,914 cycles # 1.750 GHz + 3,460,849,319 instructions # 1.62 insn per cycle + 1.221974093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952032316561E-004 +Relative difference = 3.066631594207157e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 9be7bf54b0..507a64eed8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:12:14 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:39:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception - 1,375,130,992 cycles:u # 2.798 GHz (75.90%) - 2,261,311 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.61%) - 11,313,852 stalled-cycles-backend:u # 0.82% backend cycles idle (75.57%) - 1,856,959,657 instructions:u # 1.35 insn per cycle - # 0.01 stalled cycles per insn (75.25%) - 0.529727587 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.567838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.605874e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.609566e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.496412 sec +INFO: No Floating Point Exceptions have been reported + 2,051,587,227 cycles # 2.873 GHz + 3,025,794,403 instructions # 1.47 insn per cycle + 0.774558823 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.651284e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.721094e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.724249e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.731680 sec +INFO: No Floating Point Exceptions have been reported + 5,770,677,421 cycles # 2.916 GHz + 12,010,283,008 instructions # 2.08 insn per cycle + 2.035197700 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe: Floating point exception - 13,458,291,676 cycles:u # 3.396 GHz (75.04%) - 3,048,501 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.07%) - 7,478,777 stalled-cycles-backend:u # 0.06% backend cycles idle (75.03%) - 12,292,736,463 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 3.997543610 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 5.459828e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.460600e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.460600e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.042280 sec +INFO: No Floating Point Exceptions have been reported + 86,122,252,676 cycles # 2.867 GHz + 135,657,307,138 instructions # 1.58 insn per cycle + 30.046456599 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275349717465765E-004 +Relative difference = 4.26303654465793e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.672428e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.686393e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.686393e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.462988 sec +INFO: No Floating Point Exceptions have been reported + 6,758,193,786 cycles # 2.742 GHz + 19,357,772,182 instructions # 2.86 insn per cycle + 2.467248153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69591) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274862748188362E-004 +Relative difference = 4.14665283800746e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.362305e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.367046e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.367046e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.208685 sec +INFO: No Floating Point Exceptions have been reported + 3,166,621,827 cycles # 2.612 GHz + 6,792,444,940 instructions # 2.15 insn per cycle + 1.212802697 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49012) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.652877e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.659885e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.659885e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.997146 sec +INFO: No Floating Point Exceptions have been reported + 2,625,468,482 cycles # 2.624 GHz + 5,970,509,824 instructions # 2.27 insn per cycle + 1.001249505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42601) (512y: 11) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.322992e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.327409e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327409e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.244528 sec +INFO: No Floating Point Exceptions have been reported + 2,076,691,772 cycles # 1.664 GHz + 3,494,505,327 instructions # 1.68 insn per cycle + 1.248709350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5207) (512y: 3) (512z:44836) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750237027223E-004 +Relative difference = 3.5765412974815996e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 1bb36dc113..c027e74779 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:12:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:40:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception - 1,385,776,268 cycles:u # 2.833 GHz (76.08%) - 2,351,309 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.66%) - 7,512,705 stalled-cycles-backend:u # 0.54% backend cycles idle (74.89%) - 1,806,674,893 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (74.70%) - 0.525743522 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.598287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.601913e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.492421 sec +INFO: No Floating Point Exceptions have been reported + 2,010,223,902 cycles # 2.826 GHz + 3,031,193,233 instructions # 1.51 insn per cycle + 0.770287796 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.689601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.749985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.752940e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.732807 sec +INFO: No Floating Point Exceptions have been reported + 5,640,346,322 cycles # 2.872 GHz + 11,210,275,869 instructions # 1.99 insn per cycle + 2.022037581 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe: Floating point exception - 13,316,566,072 cycles:u # 3.391 GHz (75.04%) - 3,187,894 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.16%) - 6,509,743 stalled-cycles-backend:u # 0.05% backend cycles idle (75.12%) - 12,143,279,403 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (74.90%) - 3.961298270 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 5.446094e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.446837e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.446837e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 30.118401 sec +INFO: No Floating Point Exceptions have been reported + 86,113,084,692 cycles # 2.859 GHz + 135,363,065,912 instructions # 1.57 insn per cycle + 30.122446956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 6.6271025600481842E-004 - File "", line 1 - me1=; me2=6.6271025600481842E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627535e-04 +Avg ME (F77/C++) = 6.6275349662128086E-004 +Relative difference = 5.098002770919431e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.516652e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.527742e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.527742e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.521798 sec +INFO: No Floating Point Exceptions have been reported + 6,856,870,344 cycles # 2.715 GHz + 19,407,796,379 instructions # 2.83 insn per cycle + 2.529187527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69622) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274862799683282E-004 +Relative difference = 4.2243518621014775e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.378784e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383778e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383778e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.194647 sec +INFO: No Floating Point Exceptions have been reported + 3,106,911,149 cycles # 2.593 GHz + 6,716,375,817 instructions # 2.16 insn per cycle + 1.199018593 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47699) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.633831e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642301e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642301e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.009236 sec +INFO: No Floating Point Exceptions have been reported + 2,628,290,758 cycles # 2.601 GHz + 5,969,462,739 instructions # 2.27 insn per cycle + 1.017917591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41882) (512y: 13) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.325974e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330533e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330533e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.242092 sec +INFO: No Floating Point Exceptions have been reported + 2,077,381,824 cycles # 1.674 GHz + 3,490,865,426 instructions # 1.68 insn per cycle + 1.248861709 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44487) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750247886592E-004 +Relative difference = 3.740400032174438e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index f0a09b8c50..f1d40dff2c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,51 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:04:25 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:17:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 1,687,819,594 cycles:u # 3.013 GHz (74.16%) - 2,553,938 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.50%) - 7,130,585 stalled-cycles-backend:u # 0.42% backend cycles idle (75.69%) - 1,978,130,748 instructions:u # 1.17 insn per cycle - # 0.00 stalled cycles per insn (75.74%) - 0.597897044 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.307071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.337162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.339154e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.532508 sec +INFO: No Floating Point Exceptions have been reported + 2,203,526,312 cycles # 2.869 GHz + 3,467,986,959 instructions # 1.57 insn per cycle + 0.824379177 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 22,883,618,037 cycles:u # 3.431 GHz (74.93%) - 3,644,122 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) - 6,582,753 stalled-cycles-backend:u # 0.03% backend cycles idle (74.98%) - 20,394,827,984 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 6.706756716 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.133497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.164330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165560e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.045403 sec +INFO: No Floating Point Exceptions have been reported + 9,651,769,916 cycles # 2.918 GHz + 21,560,396,285 instructions # 2.23 insn per cycle + 3.363407224 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +85,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 - File "", line 1 - me1=; me2=6.6266732376103494E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.856718e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.857596e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857596e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.839571 sec +INFO: No Floating Point Exceptions have been reported + 25,916,613,183 cycles # 2.931 GHz + 79,423,792,934 instructions # 3.06 insn per cycle + 8.843857471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731406016235E-004 +Relative difference = 2.8059296349552523e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.495399e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.498545e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.498545e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.699126 sec +INFO: No Floating Point Exceptions have been reported + 12,847,395,150 cycles # 2.733 GHz + 38,826,102,030 instructions # 3.02 insn per cycle + 4.703180057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730246908442E-004 +Relative difference = 2.98084507782618e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.037182e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.053225e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.053225e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.047520 sec +INFO: No Floating Point Exceptions have been reported + 5,598,661,180 cycles # 2.730 GHz + 13,618,631,873 instructions # 2.43 insn per cycle + 2.051512013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.221665e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.243251e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.243251e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.785642 sec +INFO: No Floating Point Exceptions have been reported + 4,865,374,839 cycles # 2.720 GHz + 12,297,660,832 instructions # 2.53 insn per cycle + 1.789585857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.872514e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.884400e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.884400e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.393483 sec +INFO: No Floating Point Exceptions have been reported + 4,171,721,525 cycles # 1.741 GHz + 6,391,185,056 instructions # 1.53 insn per cycle + 2.397568985 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1980) (512y: 93) (512z: 9360) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 8dc3670619..ae3635632d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,51 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-17_09:04:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:18:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 1,602,679,069 cycles:u # 2.961 GHz (75.53%) - 2,397,976 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.22%) - 6,616,527 stalled-cycles-backend:u # 0.41% backend cycles idle (75.57%) - 2,010,182,080 instructions:u # 1.25 insn per cycle - # 0.00 stalled cycles per insn (74.57%) - 0.577145300 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.320961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.350573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352533e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.532217 sec +INFO: No Floating Point Exceptions have been reported + 2,207,479,579 cycles # 2.873 GHz + 3,464,148,832 instructions # 1.57 insn per cycle + 0.824297603 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 22,138,381,754 cycles:u # 3.427 GHz (74.99%) - 3,614,153 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.99%) - 7,061,353 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) - 19,722,872,230 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 6.495291149 seconds time elapsed +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.148990e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.180422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.181658e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.035739 sec +INFO: No Floating Point Exceptions have been reported + 9,612,803,881 cycles # 2.915 GHz + 20,074,302,744 instructions # 2.09 insn per cycle + 3.353532451 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -53,11 +85,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 - File "", line 1 - me1=; me2=6.6266732376103494E-004; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.659538381540814e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.831511e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.832364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.832364e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.961804 sec +INFO: No Floating Point Exceptions have been reported + 26,010,493,082 cycles # 2.902 GHz + 79,449,384,960 instructions # 3.05 insn per cycle + 8.965752302 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731406016235E-004 +Relative difference = 2.8059296349552523e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.477024e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.480127e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.480127e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.723753 sec +INFO: No Floating Point Exceptions have been reported + 12,826,084,303 cycles # 2.714 GHz + 38,778,289,694 instructions # 3.02 insn per cycle + 4.727826379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730246908442E-004 +Relative difference = 2.98084507782618e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.051767e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.067821e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.067821e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.043694 sec +INFO: No Floating Point Exceptions have been reported + 5,591,778,218 cycles # 2.733 GHz + 13,733,552,430 instructions # 2.46 insn per cycle + 2.047665232 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.123898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.144357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.144357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.804403 sec +INFO: No Floating Point Exceptions have been reported + 4,951,573,094 cycles # 2.739 GHz + 12,422,632,916 instructions # 2.51 insn per cycle + 1.808331695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.884699e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.896731e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.896731e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.389141 sec +INFO: No Floating Point Exceptions have been reported + 4,181,828,175 cycles # 1.750 GHz + 6,496,177,989 instructions # 1.55 insn per cycle + 2.393377398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 191) (512z: 9369) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 8256ec0032..407fbbe6c0 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:05:22 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:20:21 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.059284e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059685e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059813e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.451621 sec +INFO: No Floating Point Exceptions have been reported + 8,080,887,114 cycles # 2.907 GHz + 16,734,437,330 instructions # 2.07 insn per cycle + 2.836211679 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.254596e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.256737e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.256950e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.020216 sec +INFO: No Floating Point Exceptions have been reported + 12,719,492,672 cycles # 2.923 GHz + 29,448,097,640 instructions # 2.32 insn per cycle + 4.407436029 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.193686e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.193730e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193730e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.437795 sec -INFO: No Floating Point Exceptions have been reported - 15,506,555,900 cycles:u # 3.500 GHz (74.95%) - 10,366,372 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) - 1,555,120,464 stalled-cycles-backend:u # 10.03% backend cycles idle (74.93%) - 53,488,558,564 instructions:u # 3.45 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 4.444923149 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.610103e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.610301e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.610301e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.935255 sec +INFO: No Floating Point Exceptions have been reported + 18,974,774,871 cycles # 2.735 GHz + 53,899,721,094 instructions # 2.84 insn per cycle + 6.939338261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.301383e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.301519e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.301519e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.296195 sec -INFO: No Floating Point Exceptions have been reported - 8,049,162,022 cycles:u # 3.501 GHz (75.07%) - 39,350,066 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.97%) - 775,648,333 stalled-cycles-backend:u # 9.64% backend cycles idle (74.95%) - 27,055,593,072 instructions:u # 3.36 insn per cycle - # 0.03 stalled cycles per insn (74.95%) - 2.303653868 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.579226e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.579318e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.579318e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.345304 sec +INFO: No Floating Point Exceptions have been reported + 9,800,813,517 cycles # 2.927 GHz + 27,149,189,789 instructions # 2.77 insn per cycle + 3.349514409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.163026e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.163527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.163527e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.024913 sec -INFO: No Floating Point Exceptions have been reported - 3,595,500,755 cycles:u # 3.497 GHz (74.89%) - 1,581,611 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.11%) - 279,683,618 stalled-cycles-backend:u # 7.78% backend cycles idle (75.10%) - 9,554,292,818 instructions:u # 2.66 insn per cycle - # 0.03 stalled cycles per insn (75.10%) - 1.042223630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.366336e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366803e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366803e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.570292 sec +INFO: No Floating Point Exceptions have been reported + 4,287,053,926 cycles # 2.724 GHz + 9,590,127,631 instructions # 2.24 insn per cycle + 1.574599019 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.904765e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.905290e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905290e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.353979 sec +INFO: No Floating Point Exceptions have been reported + 3,709,436,689 cycles # 2.733 GHz + 8,514,247,183 instructions # 2.30 insn per cycle + 1.357880276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.407683e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408196e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408196e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.552623 sec +INFO: No Floating Point Exceptions have been reported + 2,699,560,921 cycles # 1.736 GHz + 4,280,862,154 instructions # 1.59 insn per cycle + 1.556608026 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index f29745a6a4..e032151033 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:17:48 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:49:15 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.054597e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.057500e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.057500e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.436820 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,044,156,582 cycles # 2.913 GHz + 18,167,469,518 instructions # 2.26 insn per cycle + 2.819990438 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.188185e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.221546e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.221546e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.024442 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 12,704,137,155 cycles # 2.918 GHz + 27,467,799,669 instructions # 2.16 insn per cycle + 4.411963692 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.208280e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208318e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.208318e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.374666 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,322,456,911 cycles:u # 3.500 GHz (74.97%) - 7,524,722 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.96%) - 1,567,010,890 stalled-cycles-backend:u # 10.23% backend cycles idle (74.96%) - 53,484,034,623 instructions:u # 3.49 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 4.382022884 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.352982e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.353176e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.353176e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.182913 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,533,134,643 cycles # 2.719 GHz + 53,904,822,620 instructions # 2.76 insn per cycle + 7.186820393 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.274898e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.275032e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.275032e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.321252 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,133,520,976 cycles:u # 3.500 GHz (74.90%) - 79,472,583 stalled-cycles-frontend:u # 0.98% frontend cycles idle (74.87%) - 864,701,293 stalled-cycles-backend:u # 10.63% backend cycles idle (74.87%) - 27,087,071,552 instructions:u # 3.33 insn per cycle - # 0.03 stalled cycles per insn (74.90%) - 2.328456266 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.583220e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.583307e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.583307e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.337581 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,779,129,351 cycles # 2.927 GHz + 27,151,664,900 instructions # 2.78 insn per cycle + 3.341583664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.071395e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.071828e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.071828e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.042998 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,656,725,861 cycles:u # 3.496 GHz (74.85%) - 23,585,030 stalled-cycles-frontend:u # 0.64% frontend cycles idle (74.77%) - 308,692,447 stalled-cycles-backend:u # 8.44% backend cycles idle (74.76%) - 9,557,086,165 instructions:u # 2.61 insn per cycle - # 0.03 stalled cycles per insn (74.77%) - 1.051124543 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.365450e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.365854e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365854e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.570834 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,275,074,655 cycles # 2.716 GHz + 9,592,294,661 instructions # 2.24 insn per cycle + 1.574792391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.876513e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.877048e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.877048e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.364090 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,720,013,902 cycles # 2.721 GHz + 8,517,094,572 instructions # 2.29 insn per cycle + 1.368386654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.420108e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420617e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420617e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.546895 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,698,104,238 cycles # 1.741 GHz + 4,283,566,876 instructions # 1.59 insn per cycle + 1.551097954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 4195920c5e..71b1803a4d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:06:00 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:21:49 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.055075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055529e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055650e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.454190 sec +INFO: No Floating Point Exceptions have been reported + 8,101,270,896 cycles # 2.912 GHz + 18,320,414,768 instructions # 2.26 insn per cycle + 2.837550341 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.224205e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.226444e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.226692e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.029157 sec +INFO: No Floating Point Exceptions have been reported + 12,695,828,795 cycles # 2.910 GHz + 28,709,503,011 instructions # 2.26 insn per cycle + 4.420420636 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.189589e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.189626e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.189626e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.440884 sec -INFO: No Floating Point Exceptions have been reported - 15,571,050,388 cycles:u # 3.504 GHz (75.05%) - 4,077,492 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.99%) - 1,637,764,291 stalled-cycles-backend:u # 10.52% backend cycles idle (74.98%) - 53,466,110,925 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.448720346 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.201824e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.202080e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.202080e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.438167 sec +INFO: No Floating Point Exceptions have been reported + 18,843,132,149 cycles # 2.926 GHz + 53,928,570,497 instructions # 2.86 insn per cycle + 6.442267111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.350282e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.350421e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.350421e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.248138 sec -INFO: No Floating Point Exceptions have been reported - 7,882,038,590 cycles:u # 3.502 GHz (74.88%) - 1,475,397 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.78%) - 752,175,610 stalled-cycles-backend:u # 9.54% backend cycles idle (74.88%) - 27,060,670,580 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 2.255285871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.562611e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.562704e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562704e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.380180 sec +INFO: No Floating Point Exceptions have been reported + 9,918,861,148 cycles # 2.932 GHz + 27,128,280,341 instructions # 2.74 insn per cycle + 3.383996000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.109296e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.109761e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.109761e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.034695 sec -INFO: No Floating Point Exceptions have been reported - 3,617,489,803 cycles:u # 3.487 GHz (74.56%) - 24,486,372 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.80%) - 320,757,347 stalled-cycles-backend:u # 8.87% backend cycles idle (75.19%) - 9,566,966,571 instructions:u # 2.64 insn per cycle - # 0.03 stalled cycles per insn (75.33%) - 1.042060292 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.368711e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.369114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.369114e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.568940 sec +INFO: No Floating Point Exceptions have been reported + 4,289,720,535 cycles # 2.728 GHz + 9,584,928,513 instructions # 2.23 insn per cycle + 1.573132113 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.874256e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.874798e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.874798e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.364932 sec +INFO: No Floating Point Exceptions have been reported + 3,728,944,037 cycles # 2.726 GHz + 8,507,330,131 instructions # 2.28 insn per cycle + 1.368786926 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.414224e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414743e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414743e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.549013 sec +INFO: No Floating Point Exceptions have been reported + 2,698,122,905 cycles # 1.738 GHz + 4,280,648,246 instructions # 1.59 insn per cycle + 1.553090413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 185) (512z:79098) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 94035a1c5a..26694465db 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:06:37 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:23:17 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.208704e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.209632e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.209859e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.761935 sec +INFO: No Floating Point Exceptions have been reported + 5,908,004,381 cycles # 2.901 GHz + 11,686,361,328 instructions # 1.98 insn per cycle + 2.093948305 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.102338e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.102897e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.103014e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.075560 sec +INFO: No Floating Point Exceptions have been reported + 6,795,219,354 cycles # 2.902 GHz + 14,967,758,240 instructions # 2.20 insn per cycle + 2.398428041 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.098477e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098501e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.098501e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.808590 sec -INFO: No Floating Point Exceptions have been reported - 16,860,952,832 cycles:u # 3.505 GHz (74.91%) - 104,207,065 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.91%) - 1,799,771,021 stalled-cycles-backend:u # 10.67% backend cycles idle (74.99%) - 54,159,229,986 instructions:u # 3.21 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 4.816228082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.563117e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.563375e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.563375e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.168224 sec +INFO: No Floating Point Exceptions have been reported + 18,106,019,929 cycles # 2.934 GHz + 53,907,716,361 instructions # 2.98 insn per cycle + 6.172403776 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.942387e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.942898e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.942898e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.070162 sec -INFO: No Floating Point Exceptions have been reported - 3,739,406,171 cycles:u # 3.485 GHz (74.65%) - 654,362 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.56%) - 370,462,939 stalled-cycles-backend:u # 9.91% backend cycles idle (74.94%) - 13,768,829,664 instructions:u # 3.68 insn per cycle - # 0.03 stalled cycles per insn (75.40%) - 1.077878306 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.366569e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366962e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.366962e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.570259 sec +INFO: No Floating Point Exceptions have been reported + 4,597,646,888 cycles # 2.923 GHz + 13,807,163,752 instructions # 3.00 insn per cycle + 1.574045592 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.010053e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010205e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010205e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.524427 sec -INFO: No Floating Point Exceptions have been reported - 1,837,704,710 cycles:u # 3.485 GHz (74.34%) - 19,085,726 stalled-cycles-frontend:u # 1.04% frontend cycles idle (74.22%) - 161,785,061 stalled-cycles-backend:u # 8.80% backend cycles idle (74.48%) - 4,826,428,263 instructions:u # 2.63 insn per cycle - # 0.03 stalled cycles per insn (75.24%) - 0.532111325 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.801272e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.802916e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802916e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.778379 sec +INFO: No Floating Point Exceptions have been reported + 2,130,043,758 cycles # 2.726 GHz + 4,836,599,174 instructions # 2.27 insn per cycle + 0.782206721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.682520e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.684604e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.684604e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688940 sec +INFO: No Floating Point Exceptions have been reported + 1,884,507,725 cycles # 2.723 GHz + 4,290,819,235 instructions # 2.28 insn per cycle + 0.692749981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.875530e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.877565e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.877565e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.770700 sec +INFO: No Floating Point Exceptions have been reported + 1,352,613,897 cycles # 1.747 GHz + 2,162,405,721 instructions # 1.60 insn per cycle + 0.774947088 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index fd31d9982c..8e4037314e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:18:26 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:50:44 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.261572e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.268191e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.268191e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 +TOTAL : 1.738260 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,820,188,860 cycles # 2.913 GHz + 12,502,480,728 instructions # 2.15 insn per cycle + 2.056507800 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.148842e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.160493e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.160493e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 +TOTAL : 2.045649 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,753,066,902 cycles # 2.917 GHz + 14,813,097,918 instructions # 2.19 insn per cycle + 2.374262766 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.100021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.100042e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.100042e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.802584 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 16,836,387,718 cycles:u # 3.504 GHz (74.91%) - 102,891,510 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.01%) - 1,786,741,578 stalled-cycles-backend:u # 10.61% backend cycles idle (75.03%) - 54,206,960,406 instructions:u # 3.22 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 4.810260994 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.502637e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.502889e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.502889e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.211468 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,143,644,137 cycles # 2.921 GHz + 53,909,939,321 instructions # 2.97 insn per cycle + 6.215559174 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.937626e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.938053e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.938053e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.071536 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,755,091,069 cycles:u # 3.495 GHz (74.77%) - 342,975 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.69%) - 345,549,721 stalled-cycles-backend:u # 9.20% backend cycles idle (74.69%) - 13,780,342,259 instructions:u # 3.67 insn per cycle - # 0.03 stalled cycles per insn (74.86%) - 1.079071136 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.339632e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.340031e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340031e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.583158 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,644,642,721 cycles # 2.928 GHz + 13,808,855,992 instructions # 2.97 insn per cycle + 1.587116749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.035090e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.035254e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.035254e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.512549 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,798,851,477 cycles:u # 3.489 GHz (74.84%) - 814,547 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.18%) - 169,382,973 stalled-cycles-backend:u # 9.42% backend cycles idle (75.18%) - 4,813,565,737 instructions:u # 2.68 insn per cycle - # 0.04 stalled cycles per insn (75.18%) - 0.519968158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.786207e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.787843e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.787843e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.780530 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,130,238,055 cycles # 2.718 GHz + 4,838,587,482 instructions # 2.27 insn per cycle + 0.784611119 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.698223e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.700507e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.700507e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688171 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,885,276,472 cycles # 2.726 GHz + 4,293,094,440 instructions # 2.28 insn per cycle + 0.692122848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.810636e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.812720e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.812720e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.777981 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,355,130,660 cycles # 1.735 GHz + 2,164,600,762 instructions # 1.60 insn per cycle + 0.782043846 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 84d6dbe34f..113bcaacf7 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:07:08 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:24:20 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.202287e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.203031e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.203251e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.759878 sec +INFO: No Floating Point Exceptions have been reported + 5,921,315,429 cycles # 2.907 GHz + 12,451,469,321 instructions # 2.10 insn per cycle + 2.095417433 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.113173e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.113784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113870e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.088774 sec +INFO: No Floating Point Exceptions have been reported + 6,829,574,271 cycles # 2.905 GHz + 14,898,722,914 instructions # 2.18 insn per cycle + 2.410171422 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260107E-003 +Relative difference = 0.0021940095370041636 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.091252e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091274e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091274e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.841255 sec -INFO: No Floating Point Exceptions have been reported - 16,949,591,621 cycles:u # 3.499 GHz (74.91%) - 105,838,515 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.92%) - 1,813,583,312 stalled-cycles-backend:u # 10.70% backend cycles idle (75.00%) - 54,172,544,496 instructions:u # 3.20 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 4.848693160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.526309e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.526569e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.526569e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.192756 sec +INFO: No Floating Point Exceptions have been reported + 18,135,421,902 cycles # 2.927 GHz + 53,892,650,631 instructions # 2.97 insn per cycle + 6.196840431 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614199186E-003 -Relative difference = 3.435558690007174e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.961056e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.961517e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.961517e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.066638 sec -INFO: No Floating Point Exceptions have been reported - 3,731,151,584 cycles:u # 3.489 GHz (74.62%) - 318,149 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.59%) - 360,595,962 stalled-cycles-backend:u # 9.66% backend cycles idle (74.87%) - 13,778,695,801 instructions:u # 3.69 insn per cycle - # 0.03 stalled cycles per insn (75.24%) - 1.074783050 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.396709e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.397124e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397124e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.556067 sec +INFO: No Floating Point Exceptions have been reported + 4,573,398,855 cycles # 2.934 GHz + 13,800,378,388 instructions # 3.02 insn per cycle + 1.559827589 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896065809E-003 +Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.031957e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.032121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.032121e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.513718 sec -INFO: No Floating Point Exceptions have been reported - 1,806,685,149 cycles:u # 3.497 GHz (74.32%) - 875,735 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.16%) - 150,213,316 stalled-cycles-backend:u # 8.31% backend cycles idle (75.23%) - 4,822,283,812 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (75.23%) - 0.521477439 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.651495e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653049e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653049e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.795401 sec +INFO: No Floating Point Exceptions have been reported + 2,148,860,867 cycles # 2.691 GHz + 4,840,602,339 instructions # 2.25 insn per cycle + 0.799229981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.688407e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.690576e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.690576e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688541 sec +INFO: No Floating Point Exceptions have been reported + 1,890,706,185 cycles # 2.733 GHz + 4,294,394,779 instructions # 2.27 insn per cycle + 0.692328039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 24) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.826093e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.828148e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.828148e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.775828 sec +INFO: No Floating Point Exceptions have been reported + 1,357,390,482 cycles # 1.742 GHz + 2,169,212,126 instructions # 1.60 insn per cycle + 0.779795742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982957326E-003 +Relative difference = 2.0044082998332894e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 75711f9d04..2e59aa2257 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:07:39 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:25:23 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.663841e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.664390e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.664590e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.203630 sec +INFO: No Floating Point Exceptions have been reported + 7,260,397,959 cycles # 2.866 GHz + 15,031,707,879 instructions # 2.07 insn per cycle + 2.589013700 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.107763e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108067e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108098e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.442022 sec +INFO: No Floating Point Exceptions have been reported + 10,932,120,354 cycles # 2.895 GHz + 24,906,946,249 instructions # 2.28 insn per cycle + 3.831975982 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.190124e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.190161e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.190161e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.438246 sec -INFO: No Floating Point Exceptions have been reported - 15,526,840,838 cycles:u # 3.497 GHz (74.98%) - 2,952,770 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) - 1,504,887,667 stalled-cycles-backend:u # 9.69% backend cycles idle (74.97%) - 53,733,477,093 instructions:u # 3.46 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 4.446093513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.516129e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.516327e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.516327e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.025685 sec +INFO: No Floating Point Exceptions have been reported + 19,256,305,943 cycles # 2.740 GHz + 54,130,622,749 instructions # 2.81 insn per cycle + 7.029878997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.474679e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.474835e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.474835e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.135568 sec -INFO: No Floating Point Exceptions have been reported - 7,481,522,304 cycles:u # 3.499 GHz (74.95%) - 2,226,802 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) - 772,729,863 stalled-cycles-backend:u # 10.33% backend cycles idle (74.94%) - 25,864,411,572 instructions:u # 3.46 insn per cycle - # 0.03 stalled cycles per insn (74.94%) - 2.142734106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.524890e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.524973e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.524973e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.464849 sec +INFO: No Floating Point Exceptions have been reported + 9,453,784,509 cycles # 2.726 GHz + 26,186,103,091 instructions # 2.77 insn per cycle + 3.468732831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.285089e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.285622e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.285622e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.000009 sec -INFO: No Floating Point Exceptions have been reported - 3,489,273,672 cycles:u # 3.480 GHz (74.51%) - 47,806,415 stalled-cycles-frontend:u # 1.37% frontend cycles idle (74.71%) - 340,340,818 stalled-cycles-backend:u # 9.75% backend cycles idle (75.11%) - 9,098,277,117 instructions:u # 2.61 insn per cycle - # 0.04 stalled cycles per insn (75.28%) - 1.007958406 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.508306e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.508754e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508754e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.507182 sec +INFO: No Floating Point Exceptions have been reported + 4,099,795,192 cycles # 2.715 GHz + 9,249,955,249 instructions # 2.26 insn per cycle + 1.510975685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.116819e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.117442e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117442e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.284411 sec +INFO: No Floating Point Exceptions have been reported + 3,509,716,252 cycles # 2.725 GHz + 8,182,475,258 instructions # 2.33 insn per cycle + 1.288638878 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 79) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.462021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.462537e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.462537e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.527279 sec +INFO: No Floating Point Exceptions have been reported + 2,661,319,941 cycles # 1.739 GHz + 4,172,569,565 instructions # 1.57 insn per cycle + 1.531717386 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 93) (512z:78910) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index fc5355db1b..f2e4a2151c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-17_09:08:16 +make: Nothing to be done for 'all'. + +DATE: 2024-09-15_11:26:49 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.668216e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.668742e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.668891e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.204926 sec +INFO: No Floating Point Exceptions have been reported + 7,354,907,186 cycles # 2.903 GHz + 15,835,326,846 instructions # 2.15 insn per cycle + 2.589353613 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.111109e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111447e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.435805 sec +INFO: No Floating Point Exceptions have been reported + 11,002,728,447 cycles # 2.923 GHz + 25,822,053,923 instructions # 2.35 insn per cycle + 3.822280777 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.194515e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194553e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.194553e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.422163 sec -INFO: No Floating Point Exceptions have been reported - 15,487,855,703 cycles:u # 3.500 GHz (74.90%) - 5,300,953 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) - 1,623,019,803 stalled-cycles-backend:u # 10.48% backend cycles idle (75.05%) - 53,736,438,153 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 4.429837474 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.824002e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.824211e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.824211e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.748238 sec +INFO: No Floating Point Exceptions have been reported + 19,286,477,225 cycles # 2.857 GHz + 54,157,907,603 instructions # 2.81 insn per cycle + 6.752432065 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.516087e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516245e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.516245e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.100570 sec -INFO: No Floating Point Exceptions have been reported - 7,347,023,987 cycles:u # 3.493 GHz (74.90%) - 1,435,300 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.90%) - 816,939,030 stalled-cycles-backend:u # 11.12% backend cycles idle (74.90%) - 25,751,836,706 instructions:u # 3.51 insn per cycle - # 0.03 stalled cycles per insn (74.90%) - 2.107659262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.548001e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.548086e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548086e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.412561 sec +INFO: No Floating Point Exceptions have been reported + 9,302,368,855 cycles # 2.723 GHz + 26,085,336,117 instructions # 2.80 insn per cycle + 3.416771061 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95938) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.538499e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.539051e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.539051e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.955297 sec -INFO: No Floating Point Exceptions have been reported - 3,355,645,011 cycles:u # 3.502 GHz (75.15%) - 1,161,133 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) - 304,879,843 stalled-cycles-backend:u # 9.09% backend cycles idle (74.96%) - 9,038,488,166 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 0.962866348 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.533570e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.534051e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.534051e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.496264 sec +INFO: No Floating Point Exceptions have been reported + 4,086,923,304 cycles # 2.726 GHz + 9,212,952,806 instructions # 2.25 insn per cycle + 1.500090267 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.068352e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.068931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.068931e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.299365 sec +INFO: No Floating Point Exceptions have been reported + 3,513,960,907 cycles # 2.698 GHz + 8,167,668,326 instructions # 2.32 insn per cycle + 1.303235401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 229) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.521239e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.521794e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521794e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.500932 sec +INFO: No Floating Point Exceptions have been reported + 2,617,549,535 cycles # 1.740 GHz + 4,166,941,618 instructions # 1.59 insn per cycle + 1.504880250 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 175) (512z:78884) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index d8bc134c62..73af5e5b3a 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:04:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:19:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 765,289,831 cycles:u # 0.783 GHz (75.41%) - 2,386,123 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.52%) - 5,614,304 stalled-cycles-backend:u # 0.73% backend cycles idle (75.33%) - 1,234,817,074 instructions:u # 1.61 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 1.033292307 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.740481e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.765338e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886154e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459950 sec +INFO: No Floating Point Exceptions have been reported + 1,934,018,539 cycles # 2.861 GHz + 2,739,518,446 instructions # 1.42 insn per cycle + 0.734446139 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 975,011,130 cycles:u # 2.267 GHz (75.50%) - 2,444,951 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.99%) - 11,480,073 stalled-cycles-backend:u # 1.18% backend cycles idle (75.99%) - 1,400,584,923 instructions:u # 1.44 insn per cycle - # 0.01 stalled cycles per insn (75.38%) - 0.483827229 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.975676e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.474629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.695966e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.539678 sec +INFO: No Floating Point Exceptions have been reported + 2,257,806,163 cycles # 2.877 GHz + 3,239,125,642 instructions # 1.43 insn per cycle + 0.841027050 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x148aab185000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x148d41f302e2 in ??? -#1 0x148d41f2f475 in ??? -#2 0x148d40254dbf in ??? -#3 0x148d40254d2b in ??? -#4 0x148d402563e4 in ??? -#5 0x148d37bd0d1b in ??? -#6 0x148d37bcabc8 in ??? -#7 0x148d37b7c9e6 in ??? -#8 0x148d37b486e9 in ??? -#9 0x148d4032250e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.450385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.479153e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.479153e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.153489 sec -INFO: No Floating Point Exceptions have been reported - 4,044,738,022 cycles:u # 3.498 GHz (74.84%) - 2,922,470 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.09%) - 830,501,015 stalled-cycles-backend:u # 20.53% backend cycles idle (75.10%) - 13,147,449,578 instructions:u # 3.25 insn per cycle - # 0.06 stalled cycles per insn (75.10%) - 1.160916733 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.056428e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078475e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.570596 sec +INFO: No Floating Point Exceptions have been reported + 4,620,202,435 cycles # 2.935 GHz + 13,190,173,768 instructions # 2.85 insn per cycle + 1.574765138 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.540097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.629678e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.629678e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.668727 sec -INFO: No Floating Point Exceptions have been reported - 2,339,388,961 cycles:u # 3.483 GHz (75.06%) - 2,245,463 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) - 554,219,476 stalled-cycles-backend:u # 23.69% backend cycles idle (74.99%) - 7,489,152,435 instructions:u # 3.20 insn per cycle - # 0.07 stalled cycles per insn (74.99%) - 0.676035087 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3007) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.870844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.942105e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942105e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.895190 sec +INFO: No Floating Point Exceptions have been reported + 2,640,894,010 cycles # 2.940 GHz + 7,556,112,587 instructions # 2.86 insn per cycle + 0.899078617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.760795e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.087556e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.087556e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.368041 sec -INFO: No Floating Point Exceptions have been reported - 1,286,017,437 cycles:u # 3.466 GHz (74.70%) - 1,971,200 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.18%) - 265,343,931 stalled-cycles-backend:u # 20.63% backend cycles idle (74.14%) - 3,082,088,970 instructions:u # 2.40 insn per cycle - # 0.09 stalled cycles per insn (74.19%) - 0.375625105 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.155420e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359383e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.539606 sec +INFO: No Floating Point Exceptions have been reported + 1,490,717,557 cycles # 2.746 GHz + 3,161,146,919 instructions # 2.12 insn per cycle + 0.543466540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.514709e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763624e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.485581 sec +INFO: No Floating Point Exceptions have been reported + 1,345,992,067 cycles # 2.752 GHz + 3,013,895,719 instructions # 2.24 insn per cycle + 0.489750963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.329309e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438411e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438411e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.725847 sec +INFO: No Floating Point Exceptions have been reported + 1,326,647,346 cycles # 1.820 GHz + 1,963,906,161 instructions # 1.48 insn per cycle + 0.729744934 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 477e20b0a5..87049bf6bc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,106 +1,133 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:17:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:47:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 837,267,745 cycles:u # 2.489 GHz (71.08%) - 2,659,462 stalled-cycles-frontend:u # 0.32% frontend cycles idle (76.15%) - 21,721,888 stalled-cycles-backend:u # 2.59% backend cycles idle (76.24%) - 1,236,659,602 instructions:u # 1.48 insn per cycle - # 0.02 stalled cycles per insn (76.32%) - 0.368314009 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.302816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642797e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642797e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.484911 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,993,081,726 cycles # 2.857 GHz + 2,967,605,428 instructions # 1.49 insn per cycle + 0.755486323 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 3,162,312,776 cycles:u # 2.918 GHz (73.52%) - 17,166,417 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.58%) - 813,908,886 stalled-cycles-backend:u # 25.74% backend cycles idle (75.99%) - 3,174,485,339 instructions:u # 1.00 insn per cycle - # 0.26 stalled cycles per insn (75.65%) - 1.115704853 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.256871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.326938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.326938e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.759153 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,900,800,476 cycles # 2.878 GHz + 4,476,324,954 instructions # 1.54 insn per cycle + 1.066391830 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x14f094d35000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14f32c0502e2 in ??? -#1 0x14f32c04f475 in ??? -#2 0x14f32a374dbf in ??? -#3 0x14f32a374d2b in ??? -#4 0x14f32a3763e4 in ??? -#5 0x14f321cf0d1b in ??? -#6 0x14f321ceabc8 in ??? -#7 0x14f321c9c9e6 in ??? -#8 0x14f321c686e9 in ??? -#9 0x14f32a44250e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.448986e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.477760e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.477760e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.158541 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,047,436,102 cycles:u # 3.483 GHz (74.77%) - 3,030,691 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.61%) - 816,402,733 stalled-cycles-backend:u # 20.17% backend cycles idle (74.80%) - 13,152,643,657 instructions:u # 3.25 insn per cycle - # 0.06 stalled cycles per insn (75.10%) - 1.166666605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.051618e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.074674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074674e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.585397 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,659,791,475 cycles # 2.933 GHz + 13,199,729,048 instructions # 2.83 insn per cycle + 1.589552076 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -108,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.535706e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.624712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.624712e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.673486 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,361,808,073 cycles:u # 3.490 GHz (74.66%) - 2,515,451 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.08%) - 550,090,025 stalled-cycles-backend:u # 23.29% backend cycles idle (75.18%) - 7,509,163,852 instructions:u # 3.18 insn per cycle - # 0.07 stalled cycles per insn (75.18%) - 0.680952675 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3007) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.863646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935223e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935223e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.905908 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,672,075,267 cycles # 2.939 GHz + 7,605,973,490 instructions # 2.85 insn per cycle + 0.909977972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -145,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.742888e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.066882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.066882e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.372683 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,300,445,328 cycles:u # 3.459 GHz (74.65%) - 2,012,156 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.49%) - 267,226,121 stalled-cycles-backend:u # 20.55% backend cycles idle (74.48%) - 3,103,504,775 instructions:u # 2.39 insn per cycle - # 0.09 stalled cycles per insn (74.48%) - 0.380188324 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.113398e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.317707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317707e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.554094 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,524,354,103 cycles # 2.734 GHz + 3,211,905,393 instructions # 2.11 insn per cycle + 0.558166519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -182,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.488860e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737446e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.497012 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,381,887,844 cycles # 2.761 GHz + 3,066,710,334 instructions # 2.22 insn per cycle + 0.501143809 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.170464e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268423e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268423e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.785429 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,369,203,746 cycles # 1.799 GHz + 2,005,266,999 instructions # 1.46 insn per cycle + 0.789533436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index a484bbc168..f184fc3b5e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:04:52 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:19:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 744,075,935 cycles:u # 2.279 GHz (74.85%) - 2,209,449 stalled-cycles-frontend:u # 0.30% frontend cycles idle (77.83%) - 13,615,528 stalled-cycles-backend:u # 1.83% backend cycles idle (78.09%) - 1,320,674,802 instructions:u # 1.77 insn per cycle - # 0.01 stalled cycles per insn (74.69%) - 0.381986849 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.732857e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.764454e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.875095e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459022 sec +INFO: No Floating Point Exceptions have been reported + 1,935,515,541 cycles # 2.868 GHz + 2,740,568,582 instructions # 1.42 insn per cycle + 0.732282850 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 980,134,192 cycles:u # 2.284 GHz (73.50%) - 2,452,402 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.79%) - 6,620,152 stalled-cycles-backend:u # 0.68% backend cycles idle (75.21%) - 1,521,504,264 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (75.56%) - 0.464614226 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.938986e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.388894e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.605968e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.543627 sec +INFO: No Floating Point Exceptions have been reported + 2,240,911,931 cycles # 2.849 GHz + 3,134,508,527 instructions # 1.40 insn per cycle + 0.843804985 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x69231b0) on address 0x146a934f4000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x146d2a7f72e2 in ??? -#1 0x146d2a7f6475 in ??? -#2 0x146d28b1cdbf in ??? -#3 0x146d28b1cd2b in ??? -#4 0x146d28b1e3e4 in ??? -#5 0x146d20498d1b in ??? -#6 0x146d20492bc8 in ??? -#7 0x146d204449e6 in ??? -#8 0x146d201e66e9 in ??? -#9 0x146d28bea50e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.437636e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.465805e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.465805e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.163196 sec -INFO: No Floating Point Exceptions have been reported - 4,072,883,263 cycles:u # 3.493 GHz (74.64%) - 2,355,930 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.65%) - 754,409,505 stalled-cycles-backend:u # 18.52% backend cycles idle (74.90%) - 13,147,062,402 instructions:u # 3.23 insn per cycle - # 0.06 stalled cycles per insn (75.22%) - 1.170670702 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.033389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.055879e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055879e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.606028 sec +INFO: No Floating Point Exceptions have been reported + 4,632,143,331 cycles # 2.878 GHz + 13,180,119,009 instructions # 2.85 insn per cycle + 1.610268805 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.580809e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.672695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.672695e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.658402 sec -INFO: No Floating Point Exceptions have been reported - 2,302,017,863 cycles:u # 3.480 GHz (74.62%) - 1,833,884 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.60%) - 484,320,149 stalled-cycles-backend:u # 21.04% backend cycles idle (74.61%) - 7,565,887,711 instructions:u # 3.29 insn per cycle - # 0.06 stalled cycles per insn (74.70%) - 0.666091639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3000) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.829571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.900649e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.900649e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.915734 sec +INFO: No Floating Point Exceptions have been reported + 2,643,771,941 cycles # 2.877 GHz + 7,554,150,292 instructions # 2.86 insn per cycle + 0.919868185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.750495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.076489e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.076489e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.368907 sec -INFO: No Floating Point Exceptions have been reported - 1,272,039,979 cycles:u # 3.421 GHz (74.22%) - 1,866,996 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.20%) - 291,180,446 stalled-cycles-backend:u # 22.89% backend cycles idle (74.59%) - 3,080,749,092 instructions:u # 2.42 insn per cycle - # 0.09 stalled cycles per insn (75.66%) - 0.377252207 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2873) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.046256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.248866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.248866e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.558915 sec +INFO: No Floating Point Exceptions have been reported + 1,500,616,577 cycles # 2.669 GHz + 3,161,167,766 instructions # 2.11 insn per cycle + 0.563154837 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.429175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.674618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674618e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.498371 sec +INFO: No Floating Point Exceptions have been reported + 1,352,614,614 cycles # 2.696 GHz + 3,013,058,203 instructions # 2.23 insn per cycle + 0.502370936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.263352e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.370712e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.370712e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.746831 sec +INFO: No Floating Point Exceptions have been reported + 1,330,812,654 cycles # 1.774 GHz + 1,962,138,478 instructions # 1.47 insn per cycle + 0.751010006 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 103170de36..9c9085f218 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:04:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:19:29 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 806,546,029 cycles:u # 2.481 GHz (72.64%) - 2,466,010 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.16%) - 5,096,419 stalled-cycles-backend:u # 0.63% backend cycles idle (75.74%) - 1,270,003,199 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (76.29%) - 0.364327576 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.616183e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.859144e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.009356e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.455344 sec +INFO: No Floating Point Exceptions have been reported + 1,903,769,704 cycles # 2.832 GHz + 2,695,127,426 instructions # 1.42 insn per cycle + 0.728806465 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 929,696,813 cycles:u # 2.348 GHz (75.86%) - 2,392,351 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.86%) - 6,543,076 stalled-cycles-backend:u # 0.70% backend cycles idle (75.78%) - 1,450,864,031 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (75.65%) - 0.433195342 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.292895e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.269503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.615665e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.493521 sec +INFO: No Floating Point Exceptions have been reported + 2,090,898,444 cycles # 2.835 GHz + 2,942,471,441 instructions # 1.41 insn per cycle + 0.794240657 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x6922280) on address 0x14f938b6f000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14fbcfc222e2 in ??? -#1 0x14fbcfc21475 in ??? -#2 0x14fbcdf47dbf in ??? -#3 0x14fbcdf47d2b in ??? -#4 0x14fbcdf493e4 in ??? -#5 0x14fbc58c3d1b in ??? -#6 0x14fbc58bdbc8 in ??? -#7 0x14fbc586f9e6 in ??? -#8 0x14fbc583b6e9 in ??? -#9 0x14fbce01550e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.655944e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695014e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.010748 sec -INFO: No Floating Point Exceptions have been reported - 3,547,667,563 cycles:u # 3.500 GHz (74.61%) - 1,729,066 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.75%) - 399,816,523 stalled-cycles-backend:u # 11.27% backend cycles idle (74.75%) - 12,899,393,000 instructions:u # 3.64 insn per cycle - # 0.03 stalled cycles per insn (74.79%) - 1.018109903 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.085317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110333e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.528688 sec +INFO: No Floating Point Exceptions have been reported + 4,411,922,721 cycles # 2.879 GHz + 12,951,312,387 instructions # 2.94 insn per cycle + 1.532844163 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.229685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.497086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.497086e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.408927 sec -INFO: No Floating Point Exceptions have been reported - 1,418,245,736 cycles:u # 3.443 GHz (74.82%) - 1,794,172 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.76%) - 483,575,887 stalled-cycles-backend:u # 34.10% backend cycles idle (74.76%) - 4,298,766,759 instructions:u # 3.03 insn per cycle - # 0.11 stalled cycles per insn (74.76%) - 0.416911790 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.813599e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.988360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988360e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.600071 sec +INFO: No Floating Point Exceptions have been reported + 1,729,759,970 cycles # 2.867 GHz + 4,541,750,353 instructions # 2.63 insn per cycle + 0.604044137 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.525503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.383078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.383078e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.239138 sec -INFO: No Floating Point Exceptions have been reported - 822,711,784 cycles:u # 3.401 GHz (73.57%) - 4,475,648 stalled-cycles-frontend:u # 0.54% frontend cycles idle (73.70%) - 225,913,723 stalled-cycles-backend:u # 27.46% backend cycles idle (75.01%) - 1,863,586,483 instructions:u # 2.27 insn per cycle - # 0.12 stalled cycles per insn (76.53%) - 0.246741624 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.481903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.160699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.160699e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.317183 sec +INFO: No Floating Point Exceptions have been reported + 858,921,512 cycles # 2.679 GHz + 1,917,766,555 instructions # 2.23 insn per cycle + 0.321171597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.857503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.629025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.629025e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.298118 sec +INFO: No Floating Point Exceptions have been reported + 804,518,989 cycles # 2.670 GHz + 1,834,610,739 instructions # 2.28 insn per cycle + 0.301964643 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.365317e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.786659e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.786659e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.395762 sec +INFO: No Floating Point Exceptions have been reported + 728,663,796 cycles # 1.826 GHz + 1,308,267,192 instructions # 1.80 insn per cycle + 0.399787635 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 5d6b6aafb9..f23dffbec1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,106 +1,133 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:17:23 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:48:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 851,027,640 cycles:u # 2.562 GHz (72.19%) - 2,738,062 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.86%) - 21,553,583 stalled-cycles-backend:u # 2.53% backend cycles idle (75.94%) - 1,262,462,005 instructions:u # 1.48 insn per cycle - # 0.02 stalled cycles per insn (76.46%) - 0.364417631 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.986387e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.435739e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.435739e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 +TOTAL : 0.462861 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,938,742,838 cycles # 2.865 GHz + 2,865,087,008 instructions # 1.48 insn per cycle + 0.732940290 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 2,962,274,839 cycles:u # 2.921 GHz (74.51%) - 16,310,389 stalled-cycles-frontend:u # 0.55% frontend cycles idle (73.09%) - 834,349,457 stalled-cycles-backend:u # 28.17% backend cycles idle (74.26%) - 3,234,168,346 instructions:u # 1.09 insn per cycle - # 0.26 stalled cycles per insn (75.27%) - 1.049424411 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.036027e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.082450e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.082450e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 +TOTAL : 0.632974 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,495,936,905 cycles # 2.880 GHz + 3,785,157,902 instructions # 1.52 insn per cycle + 0.923834641 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x6922280) on address 0x15252d97f000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x1527c47332e2 in ??? -#1 0x1527c4732475 in ??? -#2 0x1527c2a58dbf in ??? -#3 0x1527c2a58d2b in ??? -#4 0x1527c2a5a3e4 in ??? -#5 0x1527ba3d4d1b in ??? -#6 0x1527ba3cebc8 in ??? -#7 0x1527ba3809e6 in ??? -#8 0x1527ba34c6e9 in ??? -#9 0x1527c2b2650e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.655802e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695047e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695047e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.013248 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,548,437,994 cycles:u # 3.492 GHz (74.88%) - 1,771,213 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.82%) - 400,864,942 stalled-cycles-backend:u # 11.30% backend cycles idle (74.81%) - 12,902,940,859 instructions:u # 3.64 insn per cycle - # 0.03 stalled cycles per insn (74.81%) - 1.021300390 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.104272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.129547e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.505088 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,422,322,267 cycles # 2.932 GHz + 12,955,751,055 instructions # 2.93 insn per cycle + 1.509164533 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -108,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.234861e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.501310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.501310e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.411015 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,439,279,123 cycles:u # 3.477 GHz (75.19%) - 1,815,921 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.90%) - 488,545,054 stalled-cycles-backend:u # 33.94% backend cycles idle (74.89%) - 4,324,704,827 instructions:u # 3.00 insn per cycle - # 0.11 stalled cycles per insn (74.89%) - 0.418480139 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.849156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.028095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.028095e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.596878 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,746,639,368 cycles # 2.911 GHz + 4,590,056,426 instructions # 2.63 insn per cycle + 0.600772729 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -145,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.908700e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.864660e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.864660e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.231399 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 800,159,625 cycles:u # 3.414 GHz (73.79%) - 1,927,070 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.48%) - 228,243,229 stalled-cycles-backend:u # 28.52% backend cycles idle (76.12%) - 1,884,780,194 instructions:u # 2.36 insn per cycle - # 0.12 stalled cycles per insn (76.12%) - 0.239026086 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.482456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.156915e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.156915e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.321160 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 875,522,703 cycles # 2.698 GHz + 1,954,476,479 instructions # 2.23 insn per cycle + 0.325091323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -182,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.960780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.751467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.751467e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.296758 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 821,090,022 cycles # 2.738 GHz + 1,871,468,752 instructions # 2.28 insn per cycle + 0.300472695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.492830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.932362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.932362e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.388889 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 746,594,357 cycles # 1.904 GHz + 1,349,630,324 instructions # 1.81 insn per cycle + 0.392744433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index dd554c2ed9..e2521e45b2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:05:04 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:19:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 774,187,019 cycles:u # 2.388 GHz (76.04%) - 2,413,220 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.61%) - 11,440,480 stalled-cycles-backend:u # 1.48% backend cycles idle (76.09%) - 1,210,178,313 instructions:u # 1.56 insn per cycle - # 0.01 stalled cycles per insn (77.67%) - 0.360281997 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.639867e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.865475e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.015127e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.454979 sec +INFO: No Floating Point Exceptions have been reported + 1,901,448,560 cycles # 2.824 GHz + 2,678,183,164 instructions # 1.41 insn per cycle + 0.730072764 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 932,202,166 cycles:u # 2.345 GHz (75.92%) - 2,432,651 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.90%) - 7,458,416 stalled-cycles-backend:u # 0.80% backend cycles idle (74.38%) - 1,482,506,978 instructions:u # 1.59 insn per cycle - # 0.01 stalled cycles per insn (74.88%) - 0.433805412 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.246644e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.993104e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.325490e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.496812 sec +INFO: No Floating Point Exceptions have been reported + 2,083,867,357 cycles # 2.832 GHz + 2,934,470,565 instructions # 1.41 insn per cycle + 0.792648211 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x69221d0) on address 0x14bbc70ef000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14be5e40f2e2 in ??? -#1 0x14be5e40e475 in ??? -#2 0x14be5c736dbf in ??? -#3 0x14be5c736d2b in ??? -#4 0x14be5c7383e4 in ??? -#5 0x14be540b2d1b in ??? -#6 0x14be540acbc8 in ??? -#7 0x14be5405e9e6 in ??? -#8 0x14be5402a6e9 in ??? -#9 0x14be5c80450e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.634348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.672469e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.672469e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.023951 sec -INFO: No Floating Point Exceptions have been reported - 3,587,561,145 cycles:u # 3.494 GHz (74.91%) - 1,716,315 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.08%) - 525,478,997 stalled-cycles-backend:u # 14.65% backend cycles idle (75.08%) - 12,865,660,946 instructions:u # 3.59 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 1.031366182 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 718) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.078136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103342e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103342e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.538413 sec +INFO: No Floating Point Exceptions have been reported + 4,411,092,348 cycles # 2.861 GHz + 12,926,836,759 instructions # 2.93 insn per cycle + 1.542610115 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.201846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.463974e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.463974e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.411230 sec -INFO: No Floating Point Exceptions have been reported - 1,441,859,421 cycles:u # 3.483 GHz (75.17%) - 1,803,436 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.89%) - 498,382,432 stalled-cycles-backend:u # 34.57% backend cycles idle (74.89%) - 4,298,169,788 instructions:u # 2.98 insn per cycle - # 0.12 stalled cycles per insn (74.89%) - 0.418909966 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3379) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.816554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.994652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.994652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.599654 sec +INFO: No Floating Point Exceptions have been reported + 1,728,903,265 cycles # 2.870 GHz + 4,536,279,042 instructions # 2.62 insn per cycle + 0.603646034 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.994732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.969698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.969698e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.226252 sec -INFO: No Floating Point Exceptions have been reported - 793,800,983 cycles:u # 3.465 GHz (74.01%) - 1,898,557 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.33%) - 246,057,450 stalled-cycles-backend:u # 31.00% backend cycles idle (75.57%) - 1,851,155,291 instructions:u # 2.33 insn per cycle - # 0.13 stalled cycles per insn (75.57%) - 0.233973767 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3463) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.299874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.938357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.938357e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.327791 sec +INFO: No Floating Point Exceptions have been reported + 861,849,665 cycles # 2.602 GHz + 1,914,633,101 instructions # 2.22 insn per cycle + 0.331876637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3550) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.927651e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.715679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.715679e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.294357 sec +INFO: No Floating Point Exceptions have been reported + 802,533,600 cycles # 2.696 GHz + 1,830,391,280 instructions # 2.28 insn per cycle + 0.298329557 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3366) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.433633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.866083e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.866083e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.389558 sec +INFO: No Floating Point Exceptions have been reported + 729,078,705 cycles # 1.856 GHz + 1,305,984,013 instructions # 1.79 insn per cycle + 0.393475655 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1926) (512y: 26) (512z: 2437) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 8fc7eb3cac..75ffaff930 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:05:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:19:53 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 735,623,306 cycles:u # 2.239 GHz (75.11%) - 2,161,260 stalled-cycles-frontend:u # 0.29% frontend cycles idle (78.18%) - 5,857,261 stalled-cycles-backend:u # 0.80% backend cycles idle (78.70%) - 1,335,251,931 instructions:u # 1.82 insn per cycle - # 0.00 stalled cycles per insn (75.38%) - 0.376317506 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.751782e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.854275e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.972003e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.462457 sec +INFO: No Floating Point Exceptions have been reported + 1,917,992,386 cycles # 2.823 GHz + 2,716,857,811 instructions # 1.42 insn per cycle + 0.737791701 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 1,000,994,696 cycles:u # 2.322 GHz (73.14%) - 2,515,638 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.90%) - 5,899,608 stalled-cycles-backend:u # 0.59% backend cycles idle (75.34%) - 1,495,642,750 instructions:u # 1.49 insn per cycle - # 0.00 stalled cycles per insn (75.80%) - 0.465225739 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.933049e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.480174e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.706062e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.542910 sec +INFO: No Floating Point Exceptions have been reported + 2,260,071,894 cycles # 2.877 GHz + 3,201,078,521 instructions # 1.42 insn per cycle + 0.842582370 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x6923260) on address 0x153861935000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x153af8c3c2e2 in ??? -#1 0x153af8c3b475 in ??? -#2 0x153af6f5fdbf in ??? -#3 0x153af6f5fd2b in ??? -#4 0x153af6f613e4 in ??? -#5 0x153aee8dbd1b in ??? -#6 0x153aee8d5bc8 in ??? -#7 0x153aee8879e6 in ??? -#8 0x153aee8536e9 in ??? -#9 0x153af702d50e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.464853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.494590e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.494590e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.142225 sec -INFO: No Floating Point Exceptions have been reported - 3,979,991,372 cycles:u # 3.475 GHz (74.86%) - 2,224,529 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.86%) - 513,190,678 stalled-cycles-backend:u # 12.89% backend cycles idle (74.86%) - 13,136,401,110 instructions:u # 3.30 insn per cycle - # 0.04 stalled cycles per insn (74.90%) - 1.149929388 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 706) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.028418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050644e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.613813 sec +INFO: No Floating Point Exceptions have been reported + 4,647,383,877 cycles # 2.879 GHz + 13,178,063,049 instructions # 2.84 insn per cycle + 1.618051260 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.523085e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.610528e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.610528e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.672914 sec -INFO: No Floating Point Exceptions have been reported - 2,352,367,992 cycles:u # 3.481 GHz (75.15%) - 2,123,517 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.15%) - 553,046,992 stalled-cycles-backend:u # 23.51% backend cycles idle (75.14%) - 7,445,456,101 instructions:u # 3.17 insn per cycle - # 0.07 stalled cycles per insn (75.15%) - 0.680327478 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3106) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.864504e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934483e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.898369 sec +INFO: No Floating Point Exceptions have been reported + 2,648,200,185 cycles # 2.937 GHz + 7,475,755,342 instructions # 2.82 insn per cycle + 0.902206814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.840813e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.178977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.178977e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.362116 sec -INFO: No Floating Point Exceptions have been reported - 1,267,114,391 cycles:u # 3.471 GHz (73.89%) - 1,905,595 stalled-cycles-frontend:u # 0.15% frontend cycles idle (73.50%) - 357,119,636 stalled-cycles-backend:u # 28.18% backend cycles idle (74.59%) - 3,033,671,708 instructions:u # 2.39 insn per cycle - # 0.12 stalled cycles per insn (75.91%) - 0.369300723 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3023) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.200611e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.408501e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.408501e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.532171 sec +INFO: No Floating Point Exceptions have been reported + 1,476,374,652 cycles # 2.757 GHz + 3,128,702,616 instructions # 2.12 insn per cycle + 0.536024340 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3131) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.587903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.854303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.854303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.476990 sec +INFO: No Floating Point Exceptions have been reported + 1,322,669,287 cycles # 2.754 GHz + 2,982,885,294 instructions # 2.26 insn per cycle + 0.480825528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2893) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.251912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353383e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353383e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.750102 sec +INFO: No Floating Point Exceptions have been reported + 1,363,693,421 cycles # 1.811 GHz + 1,991,339,845 instructions # 1.46 insn per cycle + 0.753947194 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2252) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index d9f9231e44..40582e53fc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,96 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-17_09:05:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_11:20:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 749,952,228 cycles:u # 2.279 GHz (74.79%) - 2,234,018 stalled-cycles-frontend:u # 0.30% frontend cycles idle (76.84%) - 11,724,657 stalled-cycles-backend:u # 1.56% backend cycles idle (74.75%) - 1,418,658,704 instructions:u # 1.89 insn per cycle - # 0.01 stalled cycles per insn (70.47%) - 0.368458862 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.750483e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.807638e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.927006e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.460124 sec +INFO: No Floating Point Exceptions have been reported + 1,938,834,678 cycles # 2.862 GHz + 2,712,058,421 instructions # 1.40 insn per cycle + 0.735469517 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 989,017,877 cycles:u # 2.307 GHz (73.51%) - 2,484,396 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.78%) - 5,232,865 stalled-cycles-backend:u # 0.53% backend cycles idle (75.77%) - 1,489,322,805 instructions:u # 1.51 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 0.516106931 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.925117e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.366077e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.584033e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.544645 sec +INFO: No Floating Point Exceptions have been reported + 2,249,855,314 cycles # 2.863 GHz + 3,222,814,057 instructions # 1.43 insn per cycle + 0.844200129 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Memory access fault by GPU node-4 (Agent handle: 0x69231b0) on address 0x15424d2a4000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x1544e438e2e2 in ??? -#1 0x1544e438d475 in ??? -#2 0x1544e26b2dbf in ??? -#3 0x1544e26b2d2b in ??? -#4 0x1544e26b43e4 in ??? -#5 0x1544da02ed1b in ??? -#6 0x1544da028bc8 in ??? -#7 0x1544d9fda9e6 in ??? -#8 0x1544d9fa66e9 in ??? -#9 0x1544e278050e in ??? -#10 0xffffffffffffffff in ??? -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = -ERROR! Fortran calculation (F77/GPU) crashed +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.460795e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.490218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.490218e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.145702 sec -INFO: No Floating Point Exceptions have been reported - 4,008,278,939 cycles:u # 3.490 GHz (75.15%) - 2,042,355 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.95%) - 713,021,142 stalled-cycles-backend:u # 17.79% backend cycles idle (74.93%) - 13,135,943,398 instructions:u # 3.28 insn per cycle - # 0.05 stalled cycles per insn (74.93%) - 1.153070404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 697) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.054095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076295e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076295e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.574055 sec +INFO: No Floating Point Exceptions have been reported + 4,641,890,435 cycles # 2.943 GHz + 13,165,898,661 instructions # 2.84 insn per cycle + 1.578249512 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -98,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.419703e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.500272e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.500272e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.700641 sec -INFO: No Floating Point Exceptions have been reported - 2,440,009,345 cycles:u # 3.468 GHz (75.00%) - 1,871,517 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.99%) - 628,564,983 stalled-cycles-backend:u # 25.76% backend cycles idle (74.99%) - 7,437,101,160 instructions:u # 3.05 insn per cycle - # 0.08 stalled cycles per insn (74.99%) - 0.708195858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3097) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.867370e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.936884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.936884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.896912 sec +INFO: No Floating Point Exceptions have been reported + 2,636,737,245 cycles # 2.930 GHz + 7,477,755,477 instructions # 2.84 insn per cycle + 0.900719288 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3142) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -133,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.814876e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.149180e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.149180e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.363790 sec -INFO: No Floating Point Exceptions have been reported - 1,273,036,982 cycles:u # 3.472 GHz (74.35%) - 2,456,139 stalled-cycles-frontend:u # 0.19% frontend cycles idle (73.87%) - 263,152,821 stalled-cycles-backend:u # 20.67% backend cycles idle (73.96%) - 3,058,369,150 instructions:u # 2.40 insn per cycle - # 0.09 stalled cycles per insn (74.83%) - 0.370999366 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3001) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.202775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410191e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.531412 sec +INFO: No Floating Point Exceptions have been reported + 1,468,072,782 cycles # 2.747 GHz + 3,129,202,339 instructions # 2.13 insn per cycle + 0.535248151 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3109) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -168,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.576512e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.841608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.841608e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.477966 sec +INFO: No Floating Point Exceptions have been reported + 1,324,577,804 cycles # 2.753 GHz + 2,983,698,636 instructions # 2.25 insn per cycle + 0.481692847 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2869) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.229034e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.331255e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331255e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.757523 sec +INFO: No Floating Point Exceptions have been reported + 1,366,953,688 cycles # 1.797 GHz + 1,991,556,146 instructions # 1.46 insn per cycle + 0.761326972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2252) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index e8f148c35c..a10430f205 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:19:52 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.222962e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.849418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.427313e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531972 sec +INFO: No Floating Point Exceptions have been reported + 2,207,295,929 cycles # 2.875 GHz + 3,148,652,719 instructions # 1.43 insn per cycle + 0.824191400 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.605446e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.642912e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.642912e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.682964 sec +INFO: No Floating Point Exceptions have been reported + 19,598,347,374 cycles # 2.930 GHz + 52,065,080,941 instructions # 2.66 insn per cycle + 6.694844262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926105795 +Relative difference = 2.1036172727915933e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.916629e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.051442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051442e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.747547 sec +INFO: No Floating Point Exceptions have been reported + 11,065,354,139 cycles # 2.943 GHz + 30,912,254,749 instructions # 2.79 insn per cycle + 3.760361851 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926105795 +Relative difference = 2.1036172727915933e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.668387e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.008748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.008748e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.399916 sec +INFO: No Floating Point Exceptions have been reported + 6,627,221,489 cycles # 2.749 GHz + 13,792,796,598 instructions # 2.08 insn per cycle + 2.412653295 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 897,787,561 cycles:u # 0.606 GHz (74.82%) - 2,322,322 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.91%) - 12,676,754 stalled-cycles-backend:u # 1.41% backend cycles idle (74.84%) - 1,488,185,060 instructions:u # 1.66 insn per cycle - # 0.01 stalled cycles per insn (75.24%) - 1.536285594 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.129922e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.540308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.540308e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.198225 sec +INFO: No Floating Point Exceptions have been reported + 6,104,431,058 cycles # 2.762 GHz + 13,134,794,290 instructions # 2.15 insn per cycle + 2.210920696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 4.3134710926110271 - File "", line 1 - me1=; me2=4.3134710926110271; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.449384e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.629220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629220e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.193674 sec +INFO: No Floating Point Exceptions have been reported + 5,993,463,965 cycles # 1.870 GHz + 8,712,960,993 instructions # 1.45 insn per cycle + 3.206184057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1943) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 960adafc8d..01ceafd1da 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:03 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:20:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.181438e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.797209e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.367517e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531302 sec +INFO: No Floating Point Exceptions have been reported + 2,216,417,295 cycles # 2.883 GHz + 3,137,968,070 instructions # 1.42 insn per cycle + 0.825226040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.706608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.748828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.748828e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.299466 sec +INFO: No Floating Point Exceptions have been reported + 18,540,883,021 cycles # 2.938 GHz + 50,178,474,604 instructions # 2.71 insn per cycle + 6.311951743 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926105795 +Relative difference = 2.1036172727915933e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.062664e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.211274e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211274e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.576786 sec +INFO: No Floating Point Exceptions have been reported + 10,549,321,378 cycles # 2.940 GHz + 29,289,408,214 instructions # 2.78 insn per cycle + 3.589213709 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926105795 +Relative difference = 2.1036172727915933e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.340015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.632096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.632096e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.568858 sec +INFO: No Floating Point Exceptions have been reported + 7,118,801,409 cycles # 2.759 GHz + 15,276,261,936 instructions # 2.15 insn per cycle + 2.581007821 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3021) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 908,292,704 cycles:u # 2.400 GHz (73.50%) - 2,392,771 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.74%) - 7,118,916 stalled-cycles-backend:u # 0.78% backend cycles idle (74.74%) - 1,496,479,891 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (75.69%) - 0.415323394 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.507726e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.822175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.822175e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.479110 sec +INFO: No Floating Point Exceptions have been reported + 6,890,334,799 cycles # 2.767 GHz + 14,747,969,860 instructions # 2.14 insn per cycle + 2.491499387 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2617) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 4.3134710926110271 - File "", line 1 - me1=; me2=4.3134710926110271; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.315391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.482899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.482899e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.316209 sec +INFO: No Floating Point Exceptions have been reported + 6,207,380,257 cycles # 1.865 GHz + 10,464,609,822 instructions # 1.69 insn per cycle + 3.328585456 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2130) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index c6baeb710f..2ef1c54aa0 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:06 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:20:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 796,145,514 cycles:u # 2.320 GHz (74.49%) - 2,394,866 stalled-cycles-frontend:u # 0.30% frontend cycles idle (73.45%) - 12,718,740 stalled-cycles-backend:u # 1.60% backend cycles idle (74.52%) - 1,443,982,349 instructions:u # 1.81 insn per cycle - # 0.01 stalled cycles per insn (74.44%) - 0.380527612 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.552559e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.511007e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603394e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.491259 sec +INFO: No Floating Point Exceptions have been reported + 2,068,896,846 cycles # 2.881 GHz + 2,979,901,367 instructions # 1.44 insn per cycle + 0.776426531 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +70,180 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 4.3135525361867622 - File "", line 1 - me1=; me2=4.3135525361867622; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.683914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.725672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.725672e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.333148 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,682,476,989 cycles # 2.947 GHz + 51,267,470,348 instructions # 2.74 insn per cycle + 6.341547157 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313574e+00 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.015012e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.280109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.280109e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.718050 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,007,664,837 cycles # 2.940 GHz + 19,370,996,217 instructions # 2.42 insn per cycle + 2.726376718 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.789023e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.799247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.799247e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.452394 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,027,415,196 cycles # 2.759 GHz + 8,886,566,152 instructions # 2.21 insn per cycle + 1.460503609 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.322323e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.475598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.475598e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.366660 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,807,036,059 cycles # 2.770 GHz + 8,489,981,547 instructions # 2.23 insn per cycle + 1.374788749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3543) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.974329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.534282e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.534282e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.862736 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,570,392,990 cycles # 1.910 GHz + 6,298,404,091 instructions # 1.76 insn per cycle + 1.870756064 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2323) (512y: 24) (512z: 2290) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index a95f0eb2dd..479ebdb204 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:08 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:21:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 832,179,582 cycles:u # 2.431 GHz (74.40%) - 2,456,119 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.73%) - 12,748,511 stalled-cycles-backend:u # 1.53% backend cycles idle (75.16%) - 1,457,243,610 instructions:u # 1.75 insn per cycle - # 0.01 stalled cycles per insn (75.09%) - 0.380517703 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.776065e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.594605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.702542e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.491621 sec +INFO: No Floating Point Exceptions have been reported + 2,069,264,305 cycles # 2.877 GHz + 2,928,235,838 instructions # 1.42 insn per cycle + 0.775773692 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +70,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 4.3135525361867622 - File "", line 1 - me1=; me2=4.3135525361867622; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.731608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.775696e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.775696e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.161667 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,113,353,892 cycles # 2.937 GHz + 49,656,566,510 instructions # 2.74 insn per cycle + 6.170127822 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313574e+00 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.528214e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.868162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.868162e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.421156 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,159,836,216 cycles # 2.949 GHz + 18,538,672,579 instructions # 2.59 insn per cycle + 2.429136947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3234) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.353305e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.808520e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.808520e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.063917 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,718,285,058 cycles # 2.760 GHz + 10,903,070,951 instructions # 1.91 insn per cycle + 2.072527320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.452070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.924355e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.924355e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.029021 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,634,694,028 cycles # 2.767 GHz + 10,598,235,094 instructions # 1.88 insn per cycle + 2.037144953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4135) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.351507e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.637189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.637189e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.516038 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,700,188,022 cycles # 1.863 GHz + 8,712,811,590 instructions # 1.85 insn per cycle + 2.524039667 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 211ebcec94..7f1052231e 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:21:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 889,655,717 cycles:u # 2.333 GHz (74.90%) - 2,361,297 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.89%) - 6,408,772 stalled-cycles-backend:u # 0.72% backend cycles idle (73.67%) - 1,471,162,588 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 0.416404111 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.233444e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.828530e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.380985e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531166 sec +INFO: No Floating Point Exceptions have been reported + 2,204,089,924 cycles # 2.871 GHz + 3,157,003,197 instructions # 1.43 insn per cycle + 0.824717149 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +70,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 - File "", line 1 - me1=; me2=4.3134711012809239; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.513438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546541e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 7.083035 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 20,836,157,287 cycles # 2.937 GHz + 52,059,859,689 instructions # 2.50 insn per cycle + 7.095325403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134711778082178 +Relative difference = 1.906102050071626e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.708543e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.825114e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.825114e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 4.025851 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,676,241,949 cycles # 2.892 GHz + 30,719,909,890 instructions # 2.63 insn per cycle + 4.038601753 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2971) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134711778082178 +Relative difference = 1.906102050071626e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.506319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.824086e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.824086e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.479724 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,856,020,570 cycles # 2.752 GHz + 13,733,686,621 instructions # 2.00 insn per cycle + 2.492002268 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.929662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.305854e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.305854e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.278940 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,324,440,516 cycles # 2.761 GHz + 13,099,663,654 instructions # 2.07 insn per cycle + 2.291244442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.121063e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.267193e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.267193e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.513123 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,549,229,378 cycles # 1.858 GHz + 8,826,958,587 instructions # 1.35 insn per cycle + 3.525479379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2013) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 27caaa9b9d..b5ff528c40 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-17_09:26:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:21:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 903,069,189 cycles:u # 2.385 GHz (73.83%) - 2,287,403 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.71%) - 7,735,428 stalled-cycles-backend:u # 0.86% backend cycles idle (75.40%) - 1,453,998,206 instructions:u # 1.61 insn per cycle - # 0.01 stalled cycles per insn (75.53%) - 0.415028548 seconds time elapsed +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.263927e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.696044e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.276288e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.532325 sec +INFO: No Floating Point Exceptions have been reported + 2,205,625,214 cycles # 2.868 GHz + 3,174,502,414 instructions # 1.44 insn per cycle + 0.825987629 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -43,11 +70,184 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 - File "", line 1 - me1=; me2=4.3134711012809239; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.0835166567625394e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.606903e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.644535e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644535e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.677680 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 19,666,494,079 cycles # 2.940 GHz + 50,081,060,677 instructions # 2.55 insn per cycle + 6.689882991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134711778082178 +Relative difference = 1.906102050071626e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.871340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.003903e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.003903e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.840726 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,259,304,877 cycles # 2.923 GHz + 29,230,934,183 instructions # 2.60 insn per cycle + 3.852980170 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2807) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134711778082178 +Relative difference = 1.906102050071626e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.726077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.943109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.943109e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.970419 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,229,432,856 cycles # 2.759 GHz + 15,297,097,015 instructions # 1.86 insn per cycle + 2.983820409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3202) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.908170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147639e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.839910 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,866,112,540 cycles # 2.759 GHz + 14,608,431,526 instructions # 1.86 insn per cycle + 2.852893659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 304) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.030076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.169205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.169205e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.616163 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,699,969,895 cycles # 1.847 GHz + 10,018,865,936 instructions # 1.50 insn per cycle + 3.629335211 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2217) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 8f00e506d3..7707f676a6 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:18:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 954,974,630 cycles:u # 0.908 GHz (75.47%) - 2,343,671 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.33%) - 8,919,960 stalled-cycles-backend:u # 0.93% backend cycles idle (75.25%) - 1,500,525,229 instructions:u # 1.57 insn per cycle - # 0.01 stalled cycles per insn (74.16%) - 1.096050604 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.767516e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.784818e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.787795e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470421 sec +INFO: No Floating Point Exceptions have been reported + 1,978,272,924 cycles # 2.864 GHz + 2,912,164,766 instructions # 1.47 insn per cycle + 0.749211691 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.005244e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.117313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.126114e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483026 sec +INFO: No Floating Point Exceptions have been reported + 2,024,260,948 cycles # 2.878 GHz + 3,029,497,927 instructions # 1.50 insn per cycle + 0.762830166 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 1,161,046,200 cycles:u # 2.722 GHz (75.01%) - 2,328,012 stalled-cycles-frontend:u # 0.20% frontend cycles idle (73.94%) - 11,393,051 stalled-cycles-backend:u # 0.98% backend cycles idle (73.28%) - 1,722,237,964 instructions:u # 1.48 insn per cycle - # 0.01 stalled cycles per insn (72.94%) - 0.464357458 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.405701e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.409074e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409074e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.157165 sec +INFO: No Floating Point Exceptions have been reported + 467,074,127 cycles # 2.919 GHz + 1,389,682,298 instructions # 2.98 insn per cycle + 0.160520641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1274562860176587E-006 - File "", line 1 - me1=; me2=8.1274562860176587E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.459230e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.470849e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.470849e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.083614 sec +INFO: No Floating Point Exceptions have been reported + 239,038,405 cycles # 2.765 GHz + 692,921,675 instructions # 2.90 insn per cycle + 0.087016440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860167168E-006 +Relative difference = 3.3392764976441195e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.419984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.425694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.425694e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.039288 sec +INFO: No Floating Point Exceptions have been reported + 113,366,397 cycles # 2.696 GHz + 257,996,166 instructions # 2.28 insn per cycle + 0.042698199 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8503) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.624961e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.632288e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632288e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034517 sec +INFO: No Floating Point Exceptions have been reported + 101,263,068 cycles # 2.711 GHz + 239,969,377 instructions # 2.37 insn per cycle + 0.037861089 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8140) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.199166e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.204857e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.204857e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046301 sec +INFO: No Floating Point Exceptions have been reported + 89,031,390 cycles # 1.806 GHz + 134,346,666 instructions # 1.51 insn per cycle + 0.049794003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7090) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index e4bab9232e..ca3a407fd8 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:18:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 952,666,848 cycles:u # 2.620 GHz (75.10%) - 2,413,575 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.93%) - 5,955,743 stalled-cycles-backend:u # 0.63% backend cycles idle (75.86%) - 1,502,436,811 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (73.71%) - 0.401055061 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.802842e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.821481e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.824642e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470683 sec +INFO: No Floating Point Exceptions have been reported + 1,990,204,132 cycles # 2.870 GHz + 2,908,985,105 instructions # 1.46 insn per cycle + 0.750697991 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.083966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.205394e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.213656e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.487582 sec +INFO: No Floating Point Exceptions have been reported + 2,025,056,560 cycles # 2.852 GHz + 2,989,421,142 instructions # 1.48 insn per cycle + 0.769117174 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 1,161,301,632 cycles:u # 2.736 GHz (75.44%) - 2,522,146 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.76%) - 5,014,212 stalled-cycles-backend:u # 0.43% backend cycles idle (73.83%) - 1,663,926,319 instructions:u # 1.43 insn per cycle - # 0.00 stalled cycles per insn (73.66%) - 0.461818006 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.394566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.397741e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397741e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.157033 sec +INFO: No Floating Point Exceptions have been reported + 465,720,728 cycles # 2.911 GHz + 1,385,003,144 instructions # 2.97 insn per cycle + 0.160593741 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1274562860176587E-006 - File "", line 1 - me1=; me2=8.1274562860176587E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860167185E-006 +Relative difference = 3.339276495559746e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.474186e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.485931e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.485931e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.082867 sec +INFO: No Floating Point Exceptions have been reported + 237,575,401 cycles # 2.770 GHz + 689,116,420 instructions # 2.90 insn per cycle + 0.086305788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860167168E-006 +Relative difference = 3.3392764976441195e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.436754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.442531e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038039 sec +INFO: No Floating Point Exceptions have been reported + 110,520,646 cycles # 2.700 GHz + 253,448,082 instructions # 2.29 insn per cycle + 0.041474271 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8458) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.611056e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.618327e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.618327e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034052 sec +INFO: No Floating Point Exceptions have been reported + 98,863,837 cycles # 2.687 GHz + 235,605,174 instructions # 2.38 insn per cycle + 0.037353270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8098) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.176506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.181658e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046442 sec +INFO: No Floating Point Exceptions have been reported + 86,647,290 cycles # 1.764 GHz + 129,720,267 instructions # 1.50 insn per cycle + 0.049837932 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7094) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 66c4896a7e..0df257cc6a 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:47 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:19:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 893,896,815 cycles:u # 2.583 GHz (74.74%) - 2,477,469 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.82%) - 5,175,634 stalled-cycles-backend:u # 0.58% backend cycles idle (75.94%) - 1,396,921,239 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (75.93%) - 0.384874288 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.204232e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.214249e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.216451e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.474860 sec +INFO: No Floating Point Exceptions have been reported + 1,958,003,333 cycles # 2.836 GHz + 2,859,472,548 instructions # 1.46 insn per cycle + 0.747968607 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.933159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.016706e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.024424e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 +TOTAL : 0.477251 sec +INFO: No Floating Point Exceptions have been reported + 1,992,584,596 cycles # 2.867 GHz + 2,884,692,368 instructions # 1.45 insn per cycle + 0.751767434 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272869669930272E-006 +Relative difference = 4.548524165778887e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 1,046,849,130 cycles:u # 2.701 GHz (76.03%) - 2,443,461 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.54%) - 5,320,923 stalled-cycles-backend:u # 0.51% backend cycles idle (74.83%) - 1,524,354,123 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (75.34%) - 0.421507472 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.431583e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.434922e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434922e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.155945 sec +INFO: No Floating Point Exceptions have been reported + 462,855,819 cycles # 2.915 GHz + 1,381,844,785 instructions # 2.99 insn per cycle + 0.159290331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1275160277913510E-006 - File "", line 1 - me1=; me2=8.1275160277913510E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.210882e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.215211e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.045512 sec +INFO: No Floating Point Exceptions have been reported + 131,360,157 cycles # 2.718 GHz + 372,013,509 instructions # 2.83 insn per cycle + 0.048801319 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.769306e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.791871e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.791871e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.021120 sec +INFO: No Floating Point Exceptions have been reported + 64,157,831 cycles # 2.680 GHz + 142,829,765 instructions # 2.23 insn per cycle + 0.024479209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9251) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.078002e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.105354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.105354e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.019156 sec +INFO: No Floating Point Exceptions have been reported + 59,143,033 cycles # 2.685 GHz + 132,774,537 instructions # 2.24 insn per cycle + 0.022562262 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8963) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.363857e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.386270e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.386270e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.024531 sec +INFO: No Floating Point Exceptions have been reported + 51,349,038 cycles # 1.867 GHz + 79,557,658 instructions # 1.55 insn per cycle + 0.028087213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2834) (512y: 32) (512z: 7442) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index e434d3365f..1f92901611 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:50 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:19:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 899,328,589 cycles:u # 2.601 GHz (74.71%) - 2,369,783 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.93%) - 6,083,724 stalled-cycles-backend:u # 0.68% backend cycles idle (76.87%) - 1,389,875,106 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (75.80%) - 0.384557491 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.234747e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.244150e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246143e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.473072 sec +INFO: No Floating Point Exceptions have been reported + 1,983,051,703 cycles # 2.868 GHz + 2,920,120,611 instructions # 1.47 insn per cycle + 0.748230768 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.099236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.199199e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.474597 sec +INFO: No Floating Point Exceptions have been reported + 1,988,967,454 cycles # 2.876 GHz + 2,944,964,203 instructions # 1.48 insn per cycle + 0.748107743 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272866419447706E-006 +Relative difference = 4.508529302013153e-06 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 1,078,310,516 cycles:u # 2.799 GHz (74.46%) - 2,431,227 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) - 6,358,432 stalled-cycles-backend:u # 0.59% backend cycles idle (74.29%) - 1,506,219,935 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.15%) - 0.421863166 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.448809e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.452114e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.452114e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.154406 sec +INFO: No Floating Point Exceptions have been reported + 460,841,033 cycles # 2.931 GHz + 1,376,637,796 instructions # 2.99 insn per cycle + 0.157690889 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1275164883853706E-006 - File "", line 1 - me1=; me2=8.1275164883853706E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.217964e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222354e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.044518 sec +INFO: No Floating Point Exceptions have been reported + 129,447,390 cycles # 2.729 GHz + 367,192,934 instructions # 2.84 insn per cycle + 0.047990838 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.769546e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.792490e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.792490e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.020349 sec +INFO: No Floating Point Exceptions have been reported + 62,145,033 cycles # 2.684 GHz + 138,048,264 instructions # 2.22 insn per cycle + 0.023682982 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9205) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.058079e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.086570e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.086570e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.018477 sec +INFO: No Floating Point Exceptions have been reported + 56,677,502 cycles # 2.660 GHz + 127,963,925 instructions # 2.26 insn per cycle + 0.021825959 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8919) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.337142e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.358958e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.358958e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.023953 sec +INFO: No Floating Point Exceptions have been reported + 48,824,483 cycles # 1.820 GHz + 74,785,723 instructions # 1.53 insn per cycle + 0.027430916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2789) (512y: 32) (512z: 7444) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 67e8719f10..c9ae973486 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:53 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:19:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 956,981,603 cycles:u # 2.620 GHz (75.83%) - 2,517,542 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.97%) - 5,371,065 stalled-cycles-backend:u # 0.56% backend cycles idle (76.05%) - 1,451,394,083 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (75.46%) - 0.401330468 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.749294e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.767595e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.770609e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.467392 sec +INFO: No Floating Point Exceptions have been reported + 1,983,595,553 cycles # 2.874 GHz + 2,922,486,219 instructions # 1.47 insn per cycle + 0.746529670 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.927630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.040034e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.047831e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485867 sec +INFO: No Floating Point Exceptions have been reported + 2,031,462,606 cycles # 2.875 GHz + 3,037,983,552 instructions # 1.50 insn per cycle + 0.765937206 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 1,169,981,271 cycles:u # 2.750 GHz (75.55%) - 2,593,245 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.67%) - 6,057,770 stalled-cycles-backend:u # 0.52% backend cycles idle (75.69%) - 1,678,272,959 instructions:u # 1.43 insn per cycle - # 0.00 stalled cycles per insn (74.88%) - 0.461591805 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.382949e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.386200e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386200e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.158173 sec +INFO: No Floating Point Exceptions have been reported + 471,387,733 cycles # 2.929 GHz + 1,398,281,899 instructions # 2.97 insn per cycle + 0.161473463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1274562879405183E-006 - File "", line 1 - me1=; me2=8.1274562879405183E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562948736117E-006 +Relative difference = 3.32837900190667e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.673807e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.686050e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.686050e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080997 sec +INFO: No Floating Point Exceptions have been reported + 235,160,008 cycles # 2.808 GHz + 688,033,850 instructions # 2.93 insn per cycle + 0.084339129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9328) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563175290919E-006 +Relative difference = 3.3005037703909805e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.415459e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422136e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422136e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.039362 sec +INFO: No Floating Point Exceptions have been reported + 112,339,380 cycles # 2.665 GHz + 253,052,093 instructions # 2.25 insn per cycle + 0.042695307 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.648852e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.656658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656658e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034079 sec +INFO: No Floating Point Exceptions have been reported + 100,217,114 cycles # 2.715 GHz + 233,607,212 instructions # 2.33 insn per cycle + 0.037476380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.192314e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.197366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.197366e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046499 sec +INFO: No Floating Point Exceptions have been reported + 89,493,670 cycles # 1.812 GHz + 133,128,515 instructions # 1.49 insn per cycle + 0.049962595 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6356) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 98d29a7943..1d81f994cb 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,51 +1,117 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-17_09:25:57 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:19:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 944,021,735 cycles:u # 2.642 GHz (75.58%) - 2,477,735 stalled-cycles-frontend:u # 0.26% frontend cycles idle (72.94%) - 6,413,288 stalled-cycles-backend:u # 0.68% backend cycles idle (71.73%) - 1,495,341,386 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (74.19%) - 0.395987038 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.765961e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.783708e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.789350e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.474700 sec +INFO: No Floating Point Exceptions have been reported + 1,974,719,372 cycles # 2.833 GHz + 2,899,642,626 instructions # 1.47 insn per cycle + 0.754986373 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.058757e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.171730e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.179415e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.484755 sec +INFO: No Floating Point Exceptions have been reported + 2,032,400,386 cycles # 2.878 GHz + 3,034,442,470 instructions # 1.49 insn per cycle + 0.765490241 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127459e-06 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 1,167,852,448 cycles:u # 2.766 GHz (75.01%) - 2,310,511 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.65%) - 5,844,521 stalled-cycles-backend:u # 0.50% backend cycles idle (75.26%) - 1,703,248,077 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (73.13%) - 0.458560013 seconds time elapsed +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 3.419840e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.423095e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423095e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.155760 sec +INFO: No Floating Point Exceptions have been reported + 467,249,665 cycles # 2.946 GHz + 1,393,566,061 instructions # 2.98 insn per cycle + 0.159156822 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -53,11 +119,140 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 8.1274562879405183E-006 - File "", line 1 - me1=; me2=8.1274562879405183E-006; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562948736117E-006 +Relative difference = 3.32837900190667e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.647634e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.659890e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.659890e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080628 sec +INFO: No Floating Point Exceptions have been reported + 234,377,416 cycles # 2.808 GHz + 684,139,763 instructions # 2.92 insn per cycle + 0.083918243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9361) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563175290919E-006 +Relative difference = 3.3005037703909805e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.444361e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.450464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.450464e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037847 sec +INFO: No Floating Point Exceptions have been reported + 110,057,998 cycles # 2.704 GHz + 248,602,467 instructions # 2.26 insn per cycle + 0.041225455 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.658647e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.666343e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.666343e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.033122 sec +INFO: No Floating Point Exceptions have been reported + 97,824,445 cycles # 2.731 GHz + 229,151,030 instructions # 2.34 insn per cycle + 0.036353420 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.188861e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.193842e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193842e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046038 sec +INFO: No Floating Point Exceptions have been reported + 87,629,988 cycles # 1.795 GHz + 128,556,729 instructions # 1.47 insn per cycle + 0.049600721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6356) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 581385cc9d..808bf6828b 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:25 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:17:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 776,488,159 cycles:u # 0.534 GHz (74.80%) - 2,516,553 stalled-cycles-frontend:u # 0.32% frontend cycles idle (74.95%) - 8,869,181 stalled-cycles-backend:u # 1.14% backend cycles idle (75.12%) - 1,356,671,347 instructions:u # 1.75 insn per cycle - # 0.01 stalled cycles per insn (75.14%) - 1.507943316 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.071674e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.333003e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756234e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.519163 sec +INFO: No Floating Point Exceptions have been reported + 2,179,162,165 cycles # 2.882 GHz + 3,070,881,799 instructions # 1.41 insn per cycle + 0.812256060 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 - File "", line 1 - me1=; me2=0.14771956172964262; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.590743366698123e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 8.736392e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.961242e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.961242e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.358463 sec +INFO: No Floating Point Exceptions have been reported + 3,905,907,731 cycles # 2.851 GHz + 9,863,781,254 instructions # 2.53 insn per cycle + 1.371009162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.459599e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.873378e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.873378e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.877453 sec +INFO: No Floating Point Exceptions have been reported + 2,486,018,663 cycles # 2.796 GHz + 6,068,811,134 instructions # 2.44 insn per cycle + 0.890013058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.202336e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.241982e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.241982e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.633614 sec +INFO: No Floating Point Exceptions have been reported + 1,818,277,006 cycles # 2.816 GHz + 3,450,832,845 instructions # 1.90 insn per cycle + 0.646259584 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.283565e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391222e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391222e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.617501 sec +INFO: No Floating Point Exceptions have been reported + 1,780,688,704 cycles # 2.829 GHz + 3,420,263,634 instructions # 1.92 insn per cycle + 0.630172459 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.121818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.051040e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051040e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.653754 sec +INFO: No Floating Point Exceptions have been reported + 1,527,075,900 cycles # 2.294 GHz + 2,560,289,188 instructions # 1.68 insn per cycle + 0.666212420 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index e79f7c2a1d..06cbb3e926 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:29 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:17:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 776,650,730 cycles:u # 2.240 GHz (74.89%) - 2,405,347 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.86%) - 5,861,901 stalled-cycles-backend:u # 0.75% backend cycles idle (74.11%) - 1,353,641,108 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (75.74%) - 0.383423982 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.969739e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.449933e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971179e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.523956 sec +INFO: No Floating Point Exceptions have been reported + 2,197,273,631 cycles # 2.867 GHz + 3,116,260,127 instructions # 1.42 insn per cycle + 0.822598423 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 - File "", line 1 - me1=; me2=0.14771956172964262; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.590743366698123e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 9.011162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.031790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031790e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.321281 sec +INFO: No Floating Point Exceptions have been reported + 3,893,594,822 cycles # 2.920 GHz + 9,744,555,445 instructions # 2.50 insn per cycle + 1.334263922 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.381914e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838565e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838565e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.917902 sec +INFO: No Floating Point Exceptions have been reported + 2,659,815,270 cycles # 2.862 GHz + 6,026,660,919 instructions # 2.27 insn per cycle + 0.930359460 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.192597e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.249251e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.249251e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.636612 sec +INFO: No Floating Point Exceptions have been reported + 1,830,188,885 cycles # 2.821 GHz + 3,421,758,036 instructions # 1.87 insn per cycle + 0.649438298 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.271531e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393290e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393290e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.619901 sec +INFO: No Floating Point Exceptions have been reported + 1,782,873,131 cycles # 2.820 GHz + 3,395,941,059 instructions # 1.90 insn per cycle + 0.632832806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.134142e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.070820e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.070820e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.650853 sec +INFO: No Floating Point Exceptions have been reported + 1,547,796,135 cycles # 2.334 GHz + 2,545,431,106 instructions # 1.64 insn per cycle + 0.664117617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index aad2582b55..6bbc9fb0da 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:31 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:17:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.000684e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.992629e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.388281e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.488697 sec +INFO: No Floating Point Exceptions have been reported + 2,047,674,909 cycles # 2.864 GHz + 2,921,802,724 instructions # 1.43 insn per cycle + 0.773667864 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477195e-01 +Avg ME (F77/GPU) = 0.14771956735057756 +Relative difference = 4.559355911674916e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 9.029870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042886e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042886e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.272870 sec +INFO: No Floating Point Exceptions have been reported + 3,752,313,957 cycles # 2.930 GHz + 9,659,106,684 instructions # 2.57 insn per cycle + 1.281538641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956094773486 +Relative difference = 2.643675256627469e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.197021e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333033e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333033e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.588107 sec +INFO: No Floating Point Exceptions have been reported + 1,715,029,446 cycles # 2.877 GHz + 4,025,277,973 instructions # 2.35 insn per cycle + 0.596790312 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.961392e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.263984e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.263984e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.463961 sec +INFO: No Floating Point Exceptions have been reported + 1,335,854,072 cycles # 2.831 GHz + 2,555,445,671 instructions # 1.91 insn per cycle + 0.472547002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 744,091,384 cycles:u # 2.269 GHz (75.70%) - 2,340,999 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.58%) - 6,500,181 stalled-cycles-backend:u # 0.87% backend cycles idle (75.05%) - 1,351,248,193 instructions:u # 1.82 insn per cycle - # 0.00 stalled cycles per insn (75.50%) - 0.383825815 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.064902e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.593039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.593039e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.451960 sec +INFO: No Floating Point Exceptions have been reported + 1,308,601,918 cycles # 2.845 GHz + 2,529,434,362 instructions # 1.93 insn per cycle + 0.460618771 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1867) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 0.14771957969060168 - File "", line 1 - me1=; me2=0.14771957969060168; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.884808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.904943e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.904943e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.475883 sec +INFO: No Floating Point Exceptions have been reported + 1,154,211,341 cycles # 2.384 GHz + 2,131,381,757 instructions # 1.85 insn per cycle + 0.484642507 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index d77f223d79..5f533fb3cd 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:33 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:18:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.019765e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.955728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.339790e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.485800 sec +INFO: No Floating Point Exceptions have been reported + 2,046,836,184 cycles # 2.868 GHz + 2,861,763,521 instructions # 1.40 insn per cycle + 0.770456449 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477195e-01 +Avg ME (F77/GPU) = 0.14771956525510177 +Relative difference = 4.4175008557828484e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 9.176079e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.058039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.251903 sec +INFO: No Floating Point Exceptions have been reported + 3,703,836,740 cycles # 2.940 GHz + 9,528,821,992 instructions # 2.57 insn per cycle + 1.260572218 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956094773486 +Relative difference = 2.643675256627469e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.192175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.322296e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322296e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.587889 sec +INFO: No Floating Point Exceptions have been reported + 1,712,573,858 cycles # 2.874 GHz + 3,991,164,090 instructions # 2.33 insn per cycle + 0.596469979 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.984131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.291891e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.291891e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.460621 sec +INFO: No Floating Point Exceptions have been reported + 1,332,768,943 cycles # 2.844 GHz + 2,539,760,549 instructions # 1.91 insn per cycle + 0.469223881 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1815) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 766,680,046 cycles:u # 2.357 GHz (75.66%) - 2,418,794 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.35%) - 7,397,866 stalled-cycles-backend:u # 0.96% backend cycles idle (75.27%) - 1,393,110,146 instructions:u # 1.82 insn per cycle - # 0.01 stalled cycles per insn (73.90%) - 0.363198062 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.068338e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.608348e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608348e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.451660 sec +INFO: No Floating Point Exceptions have been reported + 1,303,705,490 cycles # 2.835 GHz + 2,516,660,988 instructions # 1.93 insn per cycle + 0.460426647 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1776) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 0.14771957969060168 - File "", line 1 - me1=; me2=0.14771957969060168; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.904674e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952335e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952335e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.470523 sec +INFO: No Floating Point Exceptions have been reported + 1,148,816,748 cycles # 2.401 GHz + 2,115,600,264 instructions # 1.84 insn per cycle + 0.478989217 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index a33a4d4a04..45ada3a90e 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:18:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 766,231,314 cycles:u # 2.212 GHz (74.63%) - 2,355,246 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.38%) - 6,156,803 stalled-cycles-backend:u # 0.80% backend cycles idle (75.25%) - 1,368,516,818 instructions:u # 1.79 insn per cycle - # 0.00 stalled cycles per insn (75.59%) - 0.387224131 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.081665e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353918e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.800720e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.522074 sec +INFO: No Floating Point Exceptions have been reported + 2,188,417,927 cycles # 2.886 GHz + 3,109,980,535 instructions # 1.42 insn per cycle + 0.814803667 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 - File "", line 1 - me1=; me2=0.14771956187351573; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.5810037581511336e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 8.910495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017920e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017920e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.334640 sec +INFO: No Floating Point Exceptions have been reported + 3,942,717,867 cycles # 2.929 GHz + 9,888,397,619 instructions # 2.51 insn per cycle + 1.346816311 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956645541506 +Relative difference = 2.270828308707201e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.550381e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.026476e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026476e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.835395 sec +INFO: No Floating Point Exceptions have been reported + 2,474,407,927 cycles # 2.921 GHz + 6,051,781,084 instructions # 2.45 insn per cycle + 0.847852996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1410) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956645541506 +Relative difference = 2.270828308707201e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.251869e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.352067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352067e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.625126 sec +INFO: No Floating Point Exceptions have been reported + 1,795,351,792 cycles # 2.819 GHz + 3,389,782,871 instructions # 1.89 insn per cycle + 0.637929251 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.324637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.489814e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489814e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.609151 sec +INFO: No Floating Point Exceptions have been reported + 1,759,711,411 cycles # 2.834 GHz + 3,345,109,138 instructions # 1.90 insn per cycle + 0.621588850 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.155541e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119089e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119089e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.645680 sec +INFO: No Floating Point Exceptions have been reported + 1,520,276,942 cycles # 2.311 GHz + 2,512,095,426 instructions # 1.65 insn per cycle + 0.658351218 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 534aea9afc..635fef145f 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-17_09:25:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:18:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 791,762,847 cycles:u # 2.283 GHz (74.66%) - 2,396,414 stalled-cycles-frontend:u # 0.30% frontend cycles idle (76.03%) - 5,863,482 stalled-cycles-backend:u # 0.74% backend cycles idle (75.26%) - 1,295,975,849 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (74.34%) - 0.383224261 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.163952e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.460728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.975996e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.521235 sec +INFO: No Floating Point Exceptions have been reported + 2,178,235,183 cycles # 2.877 GHz + 3,088,126,574 instructions # 1.42 insn per cycle + 0.814490194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 - File "", line 1 - me1=; me2=0.14771956187351573; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 2.5810037581511336e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 8.905010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.017534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017534e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.334358 sec +INFO: No Floating Point Exceptions have been reported + 3,930,631,045 cycles # 2.921 GHz + 9,778,615,750 instructions # 2.49 insn per cycle + 1.346795690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956645541506 +Relative difference = 2.270828308707201e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.520527e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978156e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.848228 sec +INFO: No Floating Point Exceptions have been reported + 2,460,773,168 cycles # 2.862 GHz + 5,993,984,003 instructions # 2.44 insn per cycle + 0.860657174 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1368) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956645541506 +Relative difference = 2.270828308707201e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.233001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.325225e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325225e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.627322 sec +INFO: No Floating Point Exceptions have been reported + 1,810,072,132 cycles # 2.832 GHz + 3,352,499,816 instructions # 1.85 insn per cycle + 0.639697989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.331445e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.508166e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508166e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.606762 sec +INFO: No Floating Point Exceptions have been reported + 1,747,202,335 cycles # 2.825 GHz + 3,316,993,487 instructions # 1.90 insn per cycle + 0.619170203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.144157e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.097547e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.097547e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.647875 sec +INFO: No Floating Point Exceptions have been reported + 1,527,422,709 cycles # 2.315 GHz + 2,496,191,682 instructions # 1.63 insn per cycle + 0.660479795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1054) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 67f6c64343..0f0996a4b7 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:15:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.705596e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.093007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.806904e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.547454 sec +INFO: No Floating Point Exceptions have been reported + 2,244,643,125 cycles # 2.856 GHz + 3,194,753,552 instructions # 1.42 insn per cycle + 0.844381889 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.817695e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865026e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865026e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.917395 sec +INFO: No Floating Point Exceptions have been reported + 17,420,747,172 cycles # 2.939 GHz + 46,039,408,535 instructions # 2.64 insn per cycle + 5.929443281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194407 +Relative difference = 6.616637439061751e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.177458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.337417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.337417e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.449474 sec +INFO: No Floating Point Exceptions have been reported + 10,167,811,545 cycles # 2.940 GHz + 27,922,488,818 instructions # 2.75 insn per cycle + 3.461267593 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2533) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194411 +Relative difference = 6.616637417031725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.001375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.394642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.394642e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.246102 sec +INFO: No Floating Point Exceptions have been reported + 6,225,214,133 cycles # 2.758 GHz + 12,703,481,596 instructions # 2.04 insn per cycle + 2.257992148 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe: Floating point exception - 908,994,128 cycles:u # 0.614 GHz (75.10%) - 2,398,681 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.76%) - 6,897,989 stalled-cycles-backend:u # 0.76% backend cycles idle (74.79%) - 1,416,173,298 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (74.78%) - 1.536092456 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.452222e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.920256e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.920256e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.070361 sec +INFO: No Floating Point Exceptions have been reported + 5,740,692,800 cycles # 2.758 GHz + 12,120,362,498 instructions # 2.11 insn per cycle + 2.082196362 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2363) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0158358666195553 - File "", line 1 - me1=; me2=2.0158358666195553; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.496236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.681187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681187e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.148263 sec +INFO: No Floating Point Exceptions have been reported + 5,893,231,770 cycles # 1.865 GHz + 8,460,083,225 instructions # 1.44 insn per cycle + 3.160116132 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 5d3fad6a9a..b863aa4b8d 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:15:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.254248e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.331434e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002046e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.532593 sec +INFO: No Floating Point Exceptions have been reported + 2,206,521,572 cycles # 2.874 GHz + 3,181,038,873 instructions # 1.44 insn per cycle + 0.824867346 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.847951e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.897550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897550e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.824198 sec +INFO: No Floating Point Exceptions have been reported + 17,074,611,956 cycles # 2.927 GHz + 45,037,522,622 instructions # 2.64 insn per cycle + 5.835488505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194411 +Relative difference = 6.616637417031725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.339846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.517622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.517622e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.286951 sec +INFO: No Floating Point Exceptions have been reported + 9,688,702,526 cycles # 2.938 GHz + 26,805,473,197 instructions # 2.77 insn per cycle + 3.298888236 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194411 +Relative difference = 6.616637417031725e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.556976e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882070e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.448521 sec +INFO: No Floating Point Exceptions have been reported + 6,771,268,311 cycles # 2.753 GHz + 14,227,806,494 instructions # 2.10 insn per cycle + 2.460277833 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2704) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe: Floating point exception - 848,357,308 cycles:u # 2.281 GHz (74.92%) - 2,367,674 stalled-cycles-frontend:u # 0.28% frontend cycles idle (76.36%) - 5,381,570 stalled-cycles-backend:u # 0.63% backend cycles idle (74.92%) - 1,420,128,933 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (74.06%) - 0.407523134 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.776530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.130549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.130549e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.341767 sec +INFO: No Floating Point Exceptions have been reported + 6,488,711,878 cycles # 2.758 GHz + 13,822,301,429 instructions # 2.13 insn per cycle + 2.353629315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2355) (512y: 297) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0158358666195553 - File "", line 1 - me1=; me2=2.0158358666195553; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.365014e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.535721e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.535721e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.263382 sec +INFO: No Floating Point Exceptions have been reported + 6,085,804,948 cycles # 1.859 GHz + 10,219,161,569 instructions # 1.68 insn per cycle + 3.275179492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 77bc8d4d9f..cf83c07d47 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:16:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 8.223710e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.732879e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.860967e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.489964 sec +INFO: No Floating Point Exceptions have been reported + 2,041,886,975 cycles # 2.853 GHz + 2,932,689,889 instructions # 1.44 insn per cycle + 0.773926194 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.929290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.984521e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.984521e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.538462 sec +INFO: No Floating Point Exceptions have been reported + 16,282,391,613 cycles # 2.936 GHz + 45,369,954,990 instructions # 2.79 insn per cycle + 5.546087919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015849e+00 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.517678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.857555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.857555e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.425578 sec +INFO: No Floating Point Exceptions have been reported + 7,146,342,805 cycles # 2.938 GHz + 17,820,817,556 instructions # 2.49 insn per cycle + 2.433499088 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015849e+00 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.300139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.447005e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.447005e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.366410 sec +INFO: No Floating Point Exceptions have been reported + 3,812,530,133 cycles # 2.776 GHz + 8,314,531,864 instructions # 2.18 insn per cycle + 1.374237525 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3369) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe: Floating point exception - 777,343,540 cycles:u # 2.344 GHz (76.08%) - 2,487,941 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.30%) - 6,500,963 stalled-cycles-backend:u # 0.84% backend cycles idle (73.71%) - 1,352,373,320 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (74.36%) - 0.368096392 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.645853e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.897799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.897799e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.316285 sec +INFO: No Floating Point Exceptions have been reported + 3,675,425,998 cycles # 2.778 GHz + 7,974,219,247 instructions # 2.17 insn per cycle + 1.323972787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3213) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0158466693246737 - File "", line 1 - me1=; me2=2.0158466693246737; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.513578e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.178216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.178216e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.714470 sec +INFO: No Floating Point Exceptions have been reported + 3,315,579,741 cycles # 1.925 GHz + 6,150,343,295 instructions # 1.85 insn per cycle + 1.722857238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2256) (512y: 24) (512z: 2156) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 269efb388b..60f2dad34a 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,41 +1,198 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:16:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 7.969923e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.737348e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863422e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.488943 sec +INFO: No Floating Point Exceptions have been reported + 2,058,928,554 cycles # 2.875 GHz + 2,909,617,560 instructions # 1.41 insn per cycle + 0.773317754 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.962212e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019252e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019252e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.446348 sec +INFO: No Floating Point Exceptions have been reported + 16,014,544,982 cycles # 2.937 GHz + 44,474,347,041 instructions # 2.78 insn per cycle + 5.454124254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015849e+00 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.286669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.759924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.759924e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.086643 sec +INFO: No Floating Point Exceptions have been reported + 6,135,728,749 cycles # 2.931 GHz + 17,120,648,230 instructions # 2.79 insn per cycle + 2.094524948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015849e+00 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.052770e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.643067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.643067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.835441 sec +INFO: No Floating Point Exceptions have been reported + 5,101,873,696 cycles # 2.769 GHz + 10,273,156,361 instructions # 2.01 insn per cycle + 1.843297684 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe: Floating point exception - 778,744,521 cycles:u # 2.361 GHz (76.78%) - 2,450,015 stalled-cycles-frontend:u # 0.31% frontend cycles idle (74.23%) - 6,812,142 stalled-cycles-backend:u # 0.87% backend cycles idle (74.23%) - 1,389,413,627 instructions:u # 1.78 insn per cycle - # 0.00 stalled cycles per insn (74.40%) - 0.368235043 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.133897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.741009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.741009e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.812216 sec +INFO: No Floating Point Exceptions have been reported + 5,041,846,676 cycles # 2.771 GHz + 10,042,915,318 instructions # 1.99 insn per cycle + 1.820042823 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +200,44 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = -Avg ME (F77/GPU) = 2.0158466693246737 - File "", line 1 - me1=; me2=2.0158466693246737; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.642094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.969408e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.969408e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 2.363033 sec +INFO: No Floating Point Exceptions have been reported + 4,430,997,247 cycles # 1.870 GHz + 8,493,309,798 instructions # 1.92 insn per cycle + 2.370917653 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 1ea27bcdfc..62fab95ac2 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:16:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe: Floating point exception - 896,338,448 cycles:u # 2.396 GHz (75.28%) - 2,311,511 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.63%) - 7,798,060 stalled-cycles-backend:u # 0.87% backend cycles idle (75.19%) - 1,412,298,042 instructions:u # 1.58 insn per cycle - # 0.01 stalled cycles per insn (76.03%) - 0.410961939 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.370448e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.371457e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004604e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.536026 sec +INFO: No Floating Point Exceptions have been reported + 2,214,409,122 cycles # 2.867 GHz + 3,163,292,335 instructions # 1.43 insn per cycle + 0.830149622 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 - File "", line 1 - me1=; me2=2.0158358639104246; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 6.751024171044779e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.787519e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.833765e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.833765e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 6.014473 sec +INFO: No Floating Point Exceptions have been reported + 17,675,190,561 cycles # 2.934 GHz + 46,198,484,525 instructions # 2.61 insn per cycle + 6.025789457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359218686011 +Relative difference = 3.8758807327712803e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.209372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.373008e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.373008e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.414727 sec +INFO: No Floating Point Exceptions have been reported + 10,062,586,014 cycles # 2.937 GHz + 27,715,049,037 instructions # 2.75 insn per cycle + 3.427097999 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359218686011 +Relative difference = 3.8758807327712803e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.030622e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.429639e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.429639e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.231035 sec +INFO: No Floating Point Exceptions have been reported + 6,157,448,048 cycles # 2.747 GHz + 12,606,647,104 instructions # 2.05 insn per cycle + 2.242669652 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2777) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.510413e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.987462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.987462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.049793 sec +INFO: No Floating Point Exceptions have been reported + 5,651,790,254 cycles # 2.742 GHz + 12,043,922,780 instructions # 2.13 insn per cycle + 2.061986198 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2522) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.559881e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.752268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.752268e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.093339 sec +INFO: No Floating Point Exceptions have been reported + 5,777,925,002 cycles # 1.861 GHz + 8,230,989,757 instructions # 1.42 insn per cycle + 3.105063126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1866) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index e441da7fac..bde416a886 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,41 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-17_09:25:23 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-15_12:17:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe: Floating point exception - 884,580,865 cycles:u # 2.372 GHz (73.62%) - 2,327,348 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.56%) - 5,617,735 stalled-cycles-backend:u # 0.64% backend cycles idle (75.26%) - 1,436,251,699 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (78.09%) - 0.411579403 seconds time elapsed +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.300920e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.324653e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.961570e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.533607 sec +INFO: No Floating Point Exceptions have been reported + 2,198,649,887 cycles # 2.849 GHz + 3,081,934,025 instructions # 1.40 insn per cycle + 0.827713906 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -43,11 +70,174 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 - File "", line 1 - me1=; me2=2.0158358639104246; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) ) - ^ -SyntaxError: invalid syntax +Relative difference = 6.751024171044779e-08 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.850472e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.899391e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899391e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.814253 sec +INFO: No Floating Point Exceptions have been reported + 17,118,477,102 cycles # 2.939 GHz + 45,207,445,046 instructions # 2.64 insn per cycle + 5.826249043 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359218686011 +Relative difference = 3.8758807327712803e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.320488e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.495447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495447e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.305181 sec +INFO: No Floating Point Exceptions have been reported + 9,752,287,704 cycles # 2.941 GHz + 26,369,462,343 instructions # 2.70 insn per cycle + 3.316567159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359218686011 +Relative difference = 3.8758807327712803e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.466984e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.783441e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.783441e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.496548 sec +INFO: No Floating Point Exceptions have been reported + 6,902,736,140 cycles # 2.753 GHz + 14,146,955,352 instructions # 2.05 insn per cycle + 2.508688639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.747061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.095585e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.095585e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.355880 sec +INFO: No Floating Point Exceptions have been reported + 6,536,932,805 cycles # 2.762 GHz + 13,633,905,312 instructions # 2.09 insn per cycle + 2.367915662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.589352e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.785622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785622e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.069165 sec +INFO: No Floating Point Exceptions have been reported + 5,741,871,289 cycles # 1.864 GHz + 9,325,593,834 instructions # 1.62 insn per cycle + 3.081760977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED From cf602a1d32a3b6cf302c47815bbc6e8c63a7d6b6 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 10:22:08 +0300 Subject: [PATCH 41/76] [amd] in tput/throughputX.sh expose FPE crash #1003 on HIP and improve error handling --- epochX/cudacpp/tput/throughputX.sh | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index eceb6760e3..41c3c8f83f 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -556,15 +556,24 @@ function cmpExe() { echo "cmpExe $exe1 $args" echo "cmpExe $exef $argsf" if [ "${maketype}" == "-dryrun" ]; then return; fi - tmp=$(mktemp) - me1=$(${exe1} ${args} 2>${tmp} | grep MeanMatrix | awk '{print $4}'); cat ${tmp} - me2=$(${exef} ${argsf} 2>${tmp} | grep Average | awk '{print $4}'); cat ${tmp} if [ "${exe1%%/check_cuda*}" != "${exe1}" ] || [ "${exe1%%/check_hip*}" != "${exe1}" ]; then tag="/GPU)"; else tag="/C++) "; fi + tmp1=$(mktemp) + tmp2=$(mktemp) + if ! ${exe1} ${args} 2>${tmp2} >${tmp1}; then + echo "ERROR! C++ calculation (C++${tag} failed"; exit 1 # expose FPE crash #1003 on HIP + fi + me1=$(cat ${tmp1} | grep MeanMatrix | awk '{print $4}'); cat ${tmp2} + if ! ${exef} ${argsf} 2>${tmp2} >${tmp1}; then + echo "ERROR! Fortran calculation (F77${tag} failed"; exit 1 + fi + me2=$(cat ${tmp1} | grep Average | awk '{print $4}'); cat ${tmp2} echo -e "Avg ME (C++${tag} = ${me1}\nAvg ME (F77${tag} = ${me2}" if [ "${me2}" == "NaN" ]; then - echo "ERROR! Fortran calculation (F77${tag} returned NaN" + echo "ERROR! Fortran calculation (F77${tag} returned NaN"; exit 1 elif [ "${me2}" == "" ]; then - echo "ERROR! Fortran calculation (F77${tag} crashed" + echo "ERROR! Fortran calculation (F77${tag} crashed"; exit 1 + elif [ "${me1}" == "" ]; then + echo "ERROR! C++ calculation (C++${tag} crashed"; exit 1 else # NB skip python comparison if Fortran returned NaN or crashed, otherwise python returns an error status and the following tests are not executed python3 -c "me1=${me1}; me2=${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 5E-3; print ( '%s (relative difference %s 5E-3)' % ( ('OK','<=') if ok else ('ERROR','>') ) )" 2>&1 From 47a15ab6b7a69b4c4f95c3a81fb6eef490e478ce Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 10:38:52 +0300 Subject: [PATCH 42/76] [amd] in gg_tt.mad cudacpp.mk, try to work around the HIP crashes #1003 by disabling SIMD in C++ objects for HIP builds - it does not help, will revert --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 5ffb286fef..9afd5ea9e0 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -483,6 +483,7 @@ CXXFLAGS += $(OMPFLAGS) # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] ifeq ($(UNAME_P),ppc64le) + override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) else ifeq ($(BACKEND),cppavx2) @@ -493,6 +494,7 @@ ifeq ($(UNAME_P),ppc64le) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif else ifeq ($(UNAME_P),arm) + override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) else ifeq ($(BACKEND),cppavx2) @@ -503,6 +505,7 @@ else ifeq ($(UNAME_P),arm) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + override AVXFLAGSNONE = -mno-sse3 # no SIMD ifeq ($(BACKEND),cppnone) override AVXFLAGS = -mno-sse3 # no SIMD else ifeq ($(BACKEND),cppsse4) @@ -515,6 +518,7 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) endif else + override AVXFLAGSNONE = -march=x86-64 # no SIMD (see #588) ifeq ($(BACKEND),cppnone) override AVXFLAGS = -march=x86-64 # no SIMD (see #588) else ifeq ($(BACKEND),cppsse4) @@ -528,8 +532,11 @@ else endif endif # For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? +# Explicitly disable SIMD in the C++ libraries used with CUDA or HIP (work around for HIP crashes #1003) ifeq ($(GPUCC),) CXXFLAGS+= $(AVXFLAGS) +else + CXXFLAGS+= $(AVXFLAGSNONE) endif # Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") From 07e075402c1b20dbd3fcf91f4412a781749393c0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 10:39:43 +0300 Subject: [PATCH 43/76] [amd] in gg_tt.mad cudacpp.mk, revert the previous commit (1) Revert "[amd] in gg_tt.mad cudacpp.mk, try to work around the HIP crashes #1003 by disabling SIMD in C++ objects for HIP builds - it does not help, will revert" This reverts commit 2fc102767ecc6ae2e95770f4cff18e5c08d31fc1. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 ------- 1 file changed, 7 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 9afd5ea9e0..5ffb286fef 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -483,7 +483,6 @@ CXXFLAGS += $(OMPFLAGS) # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] ifeq ($(UNAME_P),ppc64le) - override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) else ifeq ($(BACKEND),cppavx2) @@ -494,7 +493,6 @@ ifeq ($(UNAME_P),ppc64le) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif else ifeq ($(UNAME_P),arm) - override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) else ifeq ($(BACKEND),cppavx2) @@ -505,7 +503,6 @@ else ifeq ($(UNAME_P),arm) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 - override AVXFLAGSNONE = -mno-sse3 # no SIMD ifeq ($(BACKEND),cppnone) override AVXFLAGS = -mno-sse3 # no SIMD else ifeq ($(BACKEND),cppsse4) @@ -518,7 +515,6 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) endif else - override AVXFLAGSNONE = -march=x86-64 # no SIMD (see #588) ifeq ($(BACKEND),cppnone) override AVXFLAGS = -march=x86-64 # no SIMD (see #588) else ifeq ($(BACKEND),cppsse4) @@ -532,11 +528,8 @@ else endif endif # For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? -# Explicitly disable SIMD in the C++ libraries used with CUDA or HIP (work around for HIP crashes #1003) ifeq ($(GPUCC),) CXXFLAGS+= $(AVXFLAGS) -else - CXXFLAGS+= $(AVXFLAGSNONE) endif # Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") From bed013d56b9ff4c4382491eb552e61d911e52be4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 10:38:52 +0300 Subject: [PATCH 44/76] [amd] in gg_tt.mad cudacpp.mk, try to work around HIP crashes #1003 by disabling SIMD in C++ objects built with hipcc - it also does not help, will revert --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 5ffb286fef..5886e8e791 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -483,6 +483,7 @@ CXXFLAGS += $(OMPFLAGS) # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] ifeq ($(UNAME_P),ppc64le) + override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) else ifeq ($(BACKEND),cppavx2) @@ -493,6 +494,7 @@ ifeq ($(UNAME_P),ppc64le) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif else ifeq ($(UNAME_P),arm) + override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) else ifeq ($(BACKEND),cppavx2) @@ -503,6 +505,7 @@ else ifeq ($(UNAME_P),arm) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 + override AVXFLAGSNONE = -mno-sse3 # no SIMD ifeq ($(BACKEND),cppnone) override AVXFLAGS = -mno-sse3 # no SIMD else ifeq ($(BACKEND),cppsse4) @@ -515,6 +518,7 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) endif else + override AVXFLAGSNONE = -march=x86-64 # no SIMD (see #588) ifeq ($(BACKEND),cppnone) override AVXFLAGS = -march=x86-64 # no SIMD (see #588) else ifeq ($(BACKEND),cppsse4) @@ -528,8 +532,11 @@ else endif endif # For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? +# Explicitly disable SIMD in the C++ libraries used with HIP and built with hipcc (work around for HIP crashes #1003) ifeq ($(GPUCC),) CXXFLAGS+= $(AVXFLAGS) +else ifneq ($(findstring hipcc,$(GPUCC)),) # FIXME: do this also for nvcc? + GPUFLAGS+= $(AVXFLAGSNONE) endif # Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") From 07845c1e7941909de6cbdba28f51ba6dcf9bde5b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:03:26 +0300 Subject: [PATCH 45/76] [amd] in gg_tt.mad cudacpp.mk, revert the previous commit (2) Revert "[amd] in gg_tt.mad cudacpp.mk, try to work around HIP crashes #1003 by disabling SIMD in C++ objects built with hipcc - it also does not help, will revert" This reverts commit 1e225fd7068eb0c67377f55c7e910af945a4d963. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 ------- 1 file changed, 7 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 5886e8e791..5ffb286fef 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -483,7 +483,6 @@ CXXFLAGS += $(OMPFLAGS) # [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] # [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] ifeq ($(UNAME_P),ppc64le) - override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) else ifeq ($(BACKEND),cppavx2) @@ -494,7 +493,6 @@ ifeq ($(UNAME_P),ppc64le) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment) endif else ifeq ($(UNAME_P),arm) - override AVXFLAGSNONE = # no SIMD ifeq ($(BACKEND),cppsse4) override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) else ifeq ($(BACKEND),cppavx2) @@ -505,7 +503,6 @@ else ifeq ($(UNAME_P),arm) $(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment) endif else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 - override AVXFLAGSNONE = -mno-sse3 # no SIMD ifeq ($(BACKEND),cppnone) override AVXFLAGS = -mno-sse3 # no SIMD else ifeq ($(BACKEND),cppsse4) @@ -518,7 +515,6 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) endif else - override AVXFLAGSNONE = -march=x86-64 # no SIMD (see #588) ifeq ($(BACKEND),cppnone) override AVXFLAGS = -march=x86-64 # no SIMD (see #588) else ifeq ($(BACKEND),cppsse4) @@ -532,11 +528,8 @@ else endif endif # For the moment, use AVXFLAGS everywhere (in C++ builds): eventually, use them only in encapsulated implementations? -# Explicitly disable SIMD in the C++ libraries used with HIP and built with hipcc (work around for HIP crashes #1003) ifeq ($(GPUCC),) CXXFLAGS+= $(AVXFLAGS) -else ifneq ($(findstring hipcc,$(GPUCC)),) # FIXME: do this also for nvcc? - GPUFLAGS+= $(AVXFLAGSNONE) endif # Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") From 14ac1d9ed634553518a8c83e4253f620cd5a5c52 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:22:35 +0300 Subject: [PATCH 46/76] [amd] in gg_tt.mad EventStatistics.h, try to work around HIP crashes #1003 by adding volatile - it does not work, will revert --- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index 3cc0813354..d35947542c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -118,12 +118,13 @@ namespace mg5amcCpu sum.maxME = std::max( s1.maxME, s2.maxME ); sum.minWG = std::min( s1.minWG, s2.minWG ); sum.maxWG = std::max( s1.maxWG, s2.maxWG ); - sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum.nevtOK(); // new mean ME + volatile size_t sum_nevtOK = sum.nevtOK(); + sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum_nevtOK; // new mean ME s1.updateRefME( sum.refME ); s2.updateRefME( sum.refME ); sum.sumMEdiff = s1.sumMEdiff + s2.sumMEdiff; sum.sqsMEdiff = s1.sqsMEdiff + s2.sqsMEdiff; - sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum.nevtOK(); // new mean WG + sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum_nevtOK; // new mean WG s1.updateRefWG( sum.refWG ); s2.updateRefWG( sum.refWG ); sum.sumWGdiff = s1.sumWGdiff + s2.sumWGdiff; From 35de4df6ce6264cc2c44929bcce3481cbfce07da Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:23:24 +0300 Subject: [PATCH 47/76] [amd] in gg_tt.mad EventStatistics.h, revert the previous commit (1) Revert "[amd] in gg_tt.mad EventStatistics.h, try to work around HIP crashes #1003 by adding volatile - it does not work, will revert" This reverts commit e2591da7b159b6d133a7cff7a4b583a8ad34d563. --- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index d35947542c..3cc0813354 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -118,13 +118,12 @@ namespace mg5amcCpu sum.maxME = std::max( s1.maxME, s2.maxME ); sum.minWG = std::min( s1.minWG, s2.minWG ); sum.maxWG = std::max( s1.maxWG, s2.maxWG ); - volatile size_t sum_nevtOK = sum.nevtOK(); - sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum_nevtOK; // new mean ME + sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum.nevtOK(); // new mean ME s1.updateRefME( sum.refME ); s2.updateRefME( sum.refME ); sum.sumMEdiff = s1.sumMEdiff + s2.sumMEdiff; sum.sqsMEdiff = s1.sqsMEdiff + s2.sqsMEdiff; - sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum_nevtOK; // new mean WG + sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum.nevtOK(); // new mean WG s1.updateRefWG( sum.refWG ); s2.updateRefWG( sum.refWG ); sum.sumWGdiff = s1.sumWGdiff + s2.sumWGdiff; From f111828812ded0533660780a72aa74d44fbdf182 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:32:02 +0300 Subject: [PATCH 48/76] [amd] in gg_tt.mad EventStatistics.h, work around HIP crashes #1003 by printing out sum.nevtOK() - this avoids teh crash but is not practical, will revert --- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index 3cc0813354..b4acfde519 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -108,6 +108,7 @@ namespace mg5amcCpu // Combine two EventStatistics EventStatistics& operator+=( const EventStatistics& stats ) { + //std::cout << "HALLO0 " << std::endl; EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy EventStatistics& sum = *this; @@ -118,16 +119,21 @@ namespace mg5amcCpu sum.maxME = std::max( s1.maxME, s2.maxME ); sum.minWG = std::min( s1.minWG, s2.minWG ); sum.maxWG = std::max( s1.maxWG, s2.maxWG ); + //std::cout << "HALLO1a " << sum.nevtOK() << std::endl; sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum.nevtOK(); // new mean ME + //std::cout << "HALLO1b " << sum.nevtOK() << std::endl; s1.updateRefME( sum.refME ); s2.updateRefME( sum.refME ); sum.sumMEdiff = s1.sumMEdiff + s2.sumMEdiff; sum.sqsMEdiff = s1.sqsMEdiff + s2.sqsMEdiff; + std::cout << "HALLO2a " << sum.nevtOK() << std::endl; sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum.nevtOK(); // new mean WG + //std::cout << "HALLO2b " << sum.nevtOK() << std::endl; s1.updateRefWG( sum.refWG ); s2.updateRefWG( sum.refWG ); sum.sumWGdiff = s1.sumWGdiff + s2.sumWGdiff; sum.sqsWGdiff = s1.sqsWGdiff + s2.sqsWGdiff; + //std::cout << "HALLON " << std::endl; return sum; } // Printout From 42446629b9f433cd90809b1975a1b2ae6d1c1a41 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:32:50 +0300 Subject: [PATCH 49/76] [amd] in gg_tt.mad EventStatistics.h, revert the previous commit (2) Revert "[amd] in gg_tt.mad EventStatistics.h, work around HIP crashes #1003 by printing out sum.nevtOK() - this avoids teh crash but is not practical, will revert" This reverts commit 725dae88d89a61d005a0031c9462fe95f4ec6728. --- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index b4acfde519..3cc0813354 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -108,7 +108,6 @@ namespace mg5amcCpu // Combine two EventStatistics EventStatistics& operator+=( const EventStatistics& stats ) { - //std::cout << "HALLO0 " << std::endl; EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy EventStatistics& sum = *this; @@ -119,21 +118,16 @@ namespace mg5amcCpu sum.maxME = std::max( s1.maxME, s2.maxME ); sum.minWG = std::min( s1.minWG, s2.minWG ); sum.maxWG = std::max( s1.maxWG, s2.maxWG ); - //std::cout << "HALLO1a " << sum.nevtOK() << std::endl; sum.refME = ( s1.meanME() * s1.nevtOK() + s2.meanME() * s2.nevtOK() ) / sum.nevtOK(); // new mean ME - //std::cout << "HALLO1b " << sum.nevtOK() << std::endl; s1.updateRefME( sum.refME ); s2.updateRefME( sum.refME ); sum.sumMEdiff = s1.sumMEdiff + s2.sumMEdiff; sum.sqsMEdiff = s1.sqsMEdiff + s2.sqsMEdiff; - std::cout << "HALLO2a " << sum.nevtOK() << std::endl; sum.refWG = ( s1.meanWG() * s1.nevtOK() + s2.meanWG() * s2.nevtOK() ) / sum.nevtOK(); // new mean WG - //std::cout << "HALLO2b " << sum.nevtOK() << std::endl; s1.updateRefWG( sum.refWG ); s2.updateRefWG( sum.refWG ); sum.sumWGdiff = s1.sumWGdiff + s2.sumWGdiff; sum.sqsWGdiff = s1.sqsWGdiff + s2.sqsWGdiff; - //std::cout << "HALLON " << std::endl; return sum; } // Printout From 305c78175324a50d43175deaec60e77ba8e61740 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:44:45 +0300 Subject: [PATCH 50/76] [amd] in gg_tt.mad and CODEGEN EventStatistics.h, work around FPE crash #1003 on hipcc by disabling optimizations for operator+= --- .../madgraph/iolibs/template_files/gpu/EventStatistics.h | 5 +++++ epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h index 3cc0813354..730ea1d697 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h @@ -106,6 +106,11 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__((optnone)) +#endif EventStatistics& operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index 3cc0813354..730ea1d697 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -106,6 +106,11 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__((optnone)) +#endif EventStatistics& operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy From b4a7b35e51e17c25409066dbeadfe6379660b3ab Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 10:48:17 +0200 Subject: [PATCH 51/76] [amd] in gg_tt.mad and CODEGEN EventStatistics.h, fix clang formatting --- .../madgraph/iolibs/template_files/gpu/EventStatistics.h | 5 +++-- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h index 730ea1d697..58fa8fc273 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h @@ -109,9 +109,10 @@ namespace mg5amcCpu #if __HIP_CLANG_ONLY__ // Disable optimizations for this function in HIPCC (work around FPE crash #1003) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization - __attribute__((optnone)) + __attribute__( ( optnone ) ) #endif - EventStatistics& operator+=( const EventStatistics& stats ) + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index 730ea1d697..58fa8fc273 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -109,9 +109,10 @@ namespace mg5amcCpu #if __HIP_CLANG_ONLY__ // Disable optimizations for this function in HIPCC (work around FPE crash #1003) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization - __attribute__((optnone)) + __attribute__( ( optnone ) ) #endif - EventStatistics& operator+=( const EventStatistics& stats ) + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy From 15df2e6fff642237dd6e968f5a7c665d5571c7ca Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 17 Sep 2024 11:03:33 +0200 Subject: [PATCH 52/76] [amd] regenerate all processes with the fix for #1003 --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 14 ++++++------ .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++++----- .../ee_mumu.sa/SubProcesses/EventStatistics.h | 8 ++++++- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 16 +++++++------- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++++----- .../gg_tt.sa/SubProcesses/EventStatistics.h | 8 ++++++- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 16 +++++++------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 16 +++++++------- .../gg_ttg.mad/SubProcesses/EventStatistics.h | 8 ++++++- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 12 +++++----- .../gg_ttg.sa/SubProcesses/EventStatistics.h | 8 ++++++- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 16 +++++++------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 +++++----- .../gg_ttgg.sa/SubProcesses/EventStatistics.h | 8 ++++++- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 ++++++++--------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 ++++++------ .../SubProcesses/EventStatistics.h | 8 ++++++- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++++++++--------- .../gq_ttq.mad/SubProcesses/EventStatistics.h | 8 ++++++- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 8 +++---- .../gq_ttq.sa/SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_heft_gg_bb_log.txt | 8 +++---- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 10 ++++----- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 16 +++++++------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_pp_tt012j_log.txt | 22 +++++++++---------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 18 +++++++-------- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 ++++++------ .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 14 ++++++------ .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 12 +++++----- .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_mad_susy_gg_tt_log.txt | 14 ++++++------ .../SubProcesses/EventStatistics.h | 8 ++++++- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 10 ++++----- .../SubProcesses/EventStatistics.h | 8 ++++++- 45 files changed, 315 insertions(+), 183 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index c4af716d3e..f2a0bec8db 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058557987213134766  +DEBUG: model prefixing takes 0.005580425262451172  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,19 +182,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.071 s +Wrote files for 8 helas calls in 0.072 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.208 s +ALOHA: aloha creates 3 routines in 0.205 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.264 s +ALOHA: aloha creates 7 routines in 0.262 s FFV1 FFV1 FFV2 @@ -234,9 +234,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.098s -user 0m1.838s -sys 0m0.249s +real 0m2.122s +user 0m1.803s +sys 0m0.276s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 2314b032c5..6f7304fc7e 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00559544563293457  +DEBUG: model prefixing takes 0.005712270736694336  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.277 s +ALOHA: aloha creates 4 routines in 0.281 s FFV1 FFV1 FFV2 @@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.668s -user 0m0.612s +real 0m0.674s +user 0m0.618s sys 0m0.051s -Code generation completed in 1 seconds +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 49092b4162..415d3efd42 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00574946403503418  +DEBUG: model prefixing takes 0.005618095397949219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -183,16 +183,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.074 s +Wrote files for 10 helas calls in 0.073 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.151 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.968s -user 0m1.645s -sys 0m0.279s +real 0m1.928s +user 0m1.656s +sys 0m0.269s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 8ca09600ad..b95abd52b0 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057675838470458984  +DEBUG: model prefixing takes 0.005679607391357422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,7 +176,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.149 s VVV1 FFV1 FFV1 @@ -191,7 +191,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.558s -user 0m0.483s -sys 0m0.061s +real 0m0.548s +user 0m0.489s +sys 0m0.049s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 7e07d9fbf2..b71e561310 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005762577056884766  +DEBUG: model prefixing takes 0.005785465240478516  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -204,7 +204,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.192 s +Wrote files for 46 helas calls in 0.194 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,14 +212,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.335 s +ALOHA: aloha creates 5 routines in 0.342 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.320 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -267,10 +267,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.668s -user 0m2.323s -sys 0m0.316s -Code generation completed in 2 seconds +real 0m2.663s +user 0m2.360s +sys 0m0.300s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ece3a1cec9..ca33901987 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057582855224609375  +DEBUG: model prefixing takes 0.005640268325805664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -182,7 +182,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s Wrote files for 36 helas calls in 0.124 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.334 s +ALOHA: aloha creates 5 routines in 0.337 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.325 s +ALOHA: aloha creates 10 routines in 0.322 s VVV1 VVV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.526s -user 0m2.214s -sys 0m0.286s +real 0m2.494s +user 0m2.208s +sys 0m0.279s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index e2ccc79a65..7c45a5037b 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00581049919128418  +DEBUG: model prefixing takes 0.005644083023071289  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.341 s +ALOHA: aloha creates 5 routines in 0.336 s VVV1 VVV1 FFV1 @@ -199,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.837s -user 0m0.751s -sys 0m0.055s +real 0m0.801s +user 0m0.743s +sys 0m0.048s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 5d00f6a26e..07fa30dda4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005704164505004883  +DEBUG: model prefixing takes 0.0056645870208740234  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.445 s -Wrote files for 222 helas calls in 0.688 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s +Wrote files for 222 helas calls in 0.691 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.340 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.322 s +ALOHA: aloha creates 10 routines in 0.323 s VVV1 VVV1 FFV1 @@ -242,9 +242,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.947s -user 0m3.631s -sys 0m0.278s +real 0m3.890s +user 0m3.600s +sys 0m0.279s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index f9c6193903..2edbc67f24 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057904720306396484  +DEBUG: model prefixing takes 0.0058209896087646484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.329 s +ALOHA: aloha creates 5 routines in 0.328 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.545s -user 0m1.414s -sys 0m0.054s +real 0m1.484s +user 0m1.408s +sys 0m0.052s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 2edaf9a3a4..e8aeedc6a7 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005669116973876953  +DEBUG: model prefixing takes 0.005594015121459961  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.938 s +1 processes with 1240 diagrams generated in 1.945 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -184,8 +184,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.779 s -Wrote files for 2281 helas calls in 18.956 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.751 s +Wrote files for 2281 helas calls in 18.901 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -193,14 +193,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.328 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.333 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -244,10 +244,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m33.677s -user 0m33.088s -sys 0m0.436s -Code generation completed in 34 seconds +real 0m33.461s +user 0m32.896s +sys 0m0.453s +Code generation completed in 33 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 6b3f5ac37b..ef337c9d60 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00566554069519043  +DEBUG: model prefixing takes 0.005349636077880859  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.997 s +1 processes with 1240 diagrams generated in 1.935 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -172,7 +172,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.792 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.748 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.498s -user 0m13.348s -sys 0m0.099s -Code generation completed in 14 seconds +real 0m13.381s +user 0m13.197s +sys 0m0.107s +Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 61ff93de59..8951b7a72a 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005731821060180664  +DEBUG: model prefixing takes 0.005605459213256836  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.082 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -216,17 +216,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.168 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Wrote files for 32 helas calls in 0.167 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.148 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.136 s FFV1 FFV1 FFV1 @@ -272,10 +272,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.244s -user 0m1.937s -sys 0m0.305s -Code generation completed in 3 seconds +real 0m2.265s +user 0m1.936s +sys 0m0.297s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 0100caee86..4d64c445bc 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005804538726806641  +DEBUG: model prefixing takes 0.0055768489837646484  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -208,7 +208,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.147 s FFV1 FFV1 FFV1 @@ -224,7 +224,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.662s -user 0m0.608s +real 0m0.758s +user 0m0.605s sys 0m0.050s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 33cc2a6d99..5cf2a423bb 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -162,13 +162,13 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.273 s +ALOHA: aloha creates 4 routines in 0.272 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.259 s +ALOHA: aloha creates 8 routines in 0.260 s VVS3 VVV1 FFV1 @@ -206,8 +206,8 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.048s -user 0m1.889s +real 0m3.072s +user 0m1.915s sys 0m0.292s Code generation completed in 3 seconds ************************************************************ diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index ebc8a90b6f..e6d3f8224e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.270 s +ALOHA: aloha creates 4 routines in 0.266 s VVS3 VVV1 FFV1 @@ -167,7 +167,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.812s -user 0m0.602s -sys 0m0.046s -Code generation completed in 1 seconds +real 0m0.664s +user 0m0.596s +sys 0m0.042s +Code generation completed in 0 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 3a41d39d53..c12c75b63f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005716800689697266  +DEBUG: model prefixing takes 0.005651950836181641  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -222,7 +222,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.665 s +12 processes with 144 diagrams generated in 0.668 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -354,18 +354,18 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 8 subprocesses (76 diagrams) in 0.209 s -Wrote files for 212 helas calls in 0.839 s +Wrote files for 212 helas calls in 0.845 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.207 s +ALOHA: aloha creates 3 routines in 0.212 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.204 s +ALOHA: aloha creates 6 routines in 0.209 s FFV1 FFV1 FFV1 @@ -461,9 +461,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.738s -user 0m4.183s -sys 0m0.543s +real 0m4.772s +user 0m4.213s +sys 0m0.546s Code generation completed in 5 seconds ************************************************************ * * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 2d56b04fff..bd8247c682 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005767345428466797  +DEBUG: model prefixing takes 0.005708217620849609  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -207,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.142 s +13 processes with 76 diagrams generated in 0.146 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -373,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.881 s +65 processes with 1119 diagrams generated in 1.940 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -691,8 +691,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1548]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.320 s -Wrote files for 810 helas calls in 2.822 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.324 s +Wrote files for 810 helas calls in 2.828 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -700,14 +700,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.346 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.324 s VVV1 VVV1 FFV1 @@ -885,10 +885,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.728s -user 0m9.718s -sys 0m0.942s -Code generation completed in 11 seconds +real 0m10.789s +user 0m9.833s +sys 0m0.922s +Code generation completed in 10 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index f51c5375c2..4dd9535986 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14151597023010254  +DEBUG: model prefixing takes 0.141585111618042  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.797 s +1 processes with 72 diagrams generated in 3.818 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -119,8 +119,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s -Wrote files for 119 helas calls in 0.397 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s +Wrote files for 119 helas calls in 0.400 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -128,14 +128,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.332 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.336 s +ALOHA: aloha creates 10 routines in 0.341 s VVV5 VVV5 FFV1 @@ -176,9 +176,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.305s -user 0m7.006s -sys 0m0.283s +real 0m7.352s +user 0m7.058s +sys 0m0.278s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 536b88812d..bf8ab5b11a 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14238429069519043  +DEBUG: model prefixing takes 0.14227843284606934  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.783 s +1 processes with 72 diagrams generated in 3.806 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -109,14 +109,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.200 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.326 s VVV5 VVV5 FFV1 @@ -136,7 +136,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.220s -user 0m5.128s -sys 0m0.069s +real 0m5.225s +user 0m5.127s +sys 0m0.077s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 3c66cbb0ec..08a8394380 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.127 s +1 processes with 6 diagrams generated in 0.128 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -582,18 +582,18 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.084 s +Wrote files for 16 helas calls in 0.085 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.188 s +ALOHA: aloha creates 3 routines in 0.191 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.187 s +ALOHA: aloha creates 6 routines in 0.191 s VVV1 VSS1 VSS1 @@ -630,9 +630,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.115s -user 0m2.723s -sys 0m0.305s +real 0m3.058s +user 0m2.745s +sys 0m0.312s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 8c46a18101..2549db0cb0 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.126 s +1 processes with 6 diagrams generated in 0.127 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -576,7 +576,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.190 s +ALOHA: aloha creates 3 routines in 0.189 s VVV1 VSS1 VSS1 @@ -592,7 +592,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.453s -user 0m1.294s -sys 0m0.055s -Code generation completed in 2 seconds +real 0m1.386s +user 0m1.303s +sys 0m0.058s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index fafdc9960e..ec83eacc71 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.120 s +1 processes with 3 diagrams generated in 0.122 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -582,16 +582,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.076 s +Wrote files for 10 helas calls in 0.077 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.140 s +ALOHA: aloha creates 2 routines in 0.143 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.138 s VVV1 FFV1 FFV1 @@ -627,9 +627,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.929s -user 0m2.609s -sys 0m0.281s +real 0m2.928s +user 0m2.618s +sys 0m0.290s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index b4451876ff..824274004a 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -575,7 +575,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.143 s VVV1 FFV1 FFV1 @@ -590,7 +590,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.335s -user 0m1.264s -sys 0m0.052s -Code generation completed in 1 seconds +real 0m1.321s +user 0m1.235s +sys 0m0.071s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h index 3cc0813354..58fa8fc273 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h @@ -106,7 +106,13 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics - EventStatistics& operator+=( const EventStatistics& stats ) +#if __HIP_CLANG_ONLY__ + // Disable optimizations for this function in HIPCC (work around FPE crash #1003) + // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization + __attribute__( ( optnone ) ) +#endif + EventStatistics& + operator+=( const EventStatistics& stats ) { EventStatistics s1 = *this; // temporary copy EventStatistics s2 = stats; // temporary copy From 9ccc0d70fc20761e78ba28220f00aa92a41769b0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 07:20:25 +0200 Subject: [PATCH 53/76] [gcc14] in gg_tt.mad and CODEGEN mgOnGpuVectors.h, distinguish between const and non-const operator[] in cxtype_v (fix build error #1004 on gcc14.2) --- .../madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h | 5 +++-- epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { From c0a3dc620f99bf1dde632adff74fb3cca646f66e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 07:30:27 +0200 Subject: [PATCH 54/76] [gcc14] in gg_tt.mad and CODEGEN mgOnGpuCxtypes.h, clarify that cxtype_ref is a const reference to two non-const fp variables --- .../iolibs/template_files/gpu/mgOnGpuCxtypes.h | 11 ++++++----- epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types From c6c6234415ca02bd4297086be2265e1c474244fa Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 09:17:54 +0200 Subject: [PATCH 55/76] [clang] in gg_tt.mad and CODEGEN EventStatistics.h, work around FPE crash #1005 on clang16 by disabling optimizations for operator+= This extends to any clang the previous workaround for #1003 which had been defined only for HIP clang --- .../madgraph/iolibs/template_files/gpu/EventStatistics.h | 5 +++-- epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif From 55dcb6b8a8104c764e200525d52360848b4c2a5b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 09:56:34 +0200 Subject: [PATCH 56/76] [clang] regenerate all processes with fixes for clang16 FPE #1005 and for gcc142 cxtype_ref #1003 --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 12 +++++----- .../SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 12 +++++----- .../ee_mumu.sa/SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h | 5 ++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 +++++----- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 6 ++--- .../gg_tt.sa/SubProcesses/EventStatistics.h | 5 ++-- epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h | 11 +++++---- epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h | 5 ++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 16 ++++++------- .../SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h | 5 ++-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 14 +++++------ .../gg_ttg.mad/SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h | 5 ++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 14 +++++------ .../gg_ttg.sa/SubProcesses/EventStatistics.h | 5 ++-- epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h | 11 +++++---- epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h | 5 ++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 18 +++++++------- .../SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 12 +++++----- .../gg_ttgg.sa/SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h | 5 ++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 20 ++++++++-------- .../SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 14 +++++------ .../SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h | 5 ++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++++++++-------- .../gq_ttq.mad/SubProcesses/EventStatistics.h | 5 ++-- .../cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h | 5 ++-- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +++++----- .../gq_ttq.sa/SubProcesses/EventStatistics.h | 5 ++-- epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h | 11 +++++---- epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 10 ++++---- .../SubProcesses/EventStatistics.h | 5 ++-- .../heft_gg_bb.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../heft_gg_bb.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 10 ++++---- .../SubProcesses/EventStatistics.h | 5 ++-- .../heft_gg_bb.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../heft_gg_bb.sa/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 20 ++++++++-------- .../SubProcesses/EventStatistics.h | 5 ++-- .../nobm_pp_ttW.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../nobm_pp_ttW.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_pp_tt012j_log.txt | 24 +++++++++---------- .../SubProcesses/EventStatistics.h | 5 ++-- .../pp_tt012j.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../pp_tt012j.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 20 ++++++++-------- .../SubProcesses/EventStatistics.h | 5 ++-- .../smeft_gg_tttt.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../smeft_gg_tttt.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 +++++------ .../SubProcesses/EventStatistics.h | 5 ++-- .../smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../smeft_gg_tttt.sa/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 12 +++++----- .../SubProcesses/EventStatistics.h | 5 ++-- .../susy_gg_t1t1.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../susy_gg_t1t1.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 6 ++--- .../SubProcesses/EventStatistics.h | 5 ++-- .../susy_gg_t1t1.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../susy_gg_t1t1.sa/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_mad_susy_gg_tt_log.txt | 12 +++++----- .../SubProcesses/EventStatistics.h | 5 ++-- .../susy_gg_tt.mad/src/mgOnGpuCxtypes.h | 11 +++++---- .../susy_gg_tt.mad/src/mgOnGpuVectors.h | 5 ++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 12 +++++----- .../SubProcesses/EventStatistics.h | 5 ++-- .../susy_gg_tt.sa/src/mgOnGpuCxtypes.h | 11 +++++---- .../susy_gg_tt.sa/src/mgOnGpuVectors.h | 5 ++-- 89 files changed, 425 insertions(+), 359 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index f2a0bec8db..6010e696c6 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005580425262451172  +DEBUG: model prefixing takes 0.005605936050415039  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,7 +182,7 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.072 s +Wrote files for 8 helas calls in 0.071 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -194,7 +194,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.262 s +ALOHA: aloha creates 7 routines in 0.260 s FFV1 FFV1 FFV2 @@ -234,9 +234,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.122s -user 0m1.803s -sys 0m0.276s +real 0m2.087s +user 0m1.815s +sys 0m0.264s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/ee_mumu.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 6f7304fc7e..7c6ab02bd5 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005712270736694336  +DEBUG: model prefixing takes 0.005640983581542969  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.281 s +ALOHA: aloha creates 4 routines in 0.282 s FFV1 FFV1 FFV2 @@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.674s -user 0m0.618s -sys 0m0.051s -Code generation completed in 0 seconds +real 0m0.764s +user 0m0.619s +sys 0m0.055s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/ee_mumu.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 415d3efd42..2551473a78 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005618095397949219  +DEBUG: model prefixing takes 0.005740642547607422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -188,11 +188,11 @@ Wrote files for 10 helas calls in 0.073 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.152 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.137 s VVV1 FFV1 FFV1 @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.928s -user 0m1.656s -sys 0m0.269s +real 0m1.933s +user 0m1.680s +sys 0m0.251s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index b95abd52b0..c29b1e2433 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005679607391357422  +DEBUG: model prefixing takes 0.005649089813232422  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,7 +176,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.149 s +ALOHA: aloha creates 2 routines in 0.147 s VVV1 FFV1 FFV1 @@ -191,7 +191,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.548s +real 0m0.547s user 0m0.489s sys 0m0.049s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index b71e561310..7c0904d06f 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005785465240478516  +DEBUG: model prefixing takes 0.0056819915771484375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -204,7 +204,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.194 s +Wrote files for 46 helas calls in 0.191 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,14 +212,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.342 s +ALOHA: aloha creates 5 routines in 0.337 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.318 s VVV1 VVV1 FFV1 @@ -267,10 +267,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.663s -user 0m2.360s -sys 0m0.300s -Code generation completed in 3 seconds +real 0m2.650s +user 0m2.337s +sys 0m0.308s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ca33901987..18ad3844a6 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005640268325805664  +DEBUG: model prefixing takes 0.005638837814331055  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.124 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Wrote files for 36 helas calls in 0.126 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -198,7 +198,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.322 s +ALOHA: aloha creates 10 routines in 0.321 s VVV1 VVV1 FFV1 @@ -239,9 +239,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.494s -user 0m2.208s -sys 0m0.279s +real 0m2.513s +user 0m2.242s +sys 0m0.259s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttg.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 7c45a5037b..1c2396d45a 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005644083023071289  +DEBUG: model prefixing takes 0.005800962448120117  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.023 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.336 s +ALOHA: aloha creates 5 routines in 0.334 s VVV1 VVV1 FFV1 @@ -199,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.801s -user 0m0.743s -sys 0m0.048s +real 0m0.804s +user 0m0.744s +sys 0m0.051s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttg.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 07fa30dda4..77dd9d1a0d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056645870208740234  +DEBUG: model prefixing takes 0.0057544708251953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.163 s +1 processes with 123 diagrams generated in 0.164 s Total: 1 processes with 123 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.441 s -Wrote files for 222 helas calls in 0.691 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.443 s +Wrote files for 222 helas calls in 0.683 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.340 s +ALOHA: aloha creates 5 routines in 0.345 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.323 s +ALOHA: aloha creates 10 routines in 0.326 s VVV1 VVV1 FFV1 @@ -242,9 +242,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.890s -user 0m3.600s -sys 0m0.279s +real 0m3.912s +user 0m3.607s +sys 0m0.294s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 2edbc67f24..208ddd63e8 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058209896087646484  +DEBUG: model prefixing takes 0.005644083023071289  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.440 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.330 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.484s -user 0m1.408s -sys 0m0.052s +real 0m1.489s +user 0m1.419s +sys 0m0.057s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index e8aeedc6a7..a3493e03ce 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005594015121459961  +DEBUG: model prefixing takes 0.005598545074462891  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.945 s +1 processes with 1240 diagrams generated in 1.940 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -184,8 +184,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.751 s -Wrote files for 2281 helas calls in 18.901 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.735 s +Wrote files for 2281 helas calls in 18.815 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -193,14 +193,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.327 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.320 s +ALOHA: aloha creates 10 routines in 0.321 s VVV1 VVV1 FFV1 @@ -244,10 +244,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m33.461s -user 0m32.896s -sys 0m0.453s -Code generation completed in 33 seconds +real 0m33.389s +user 0m32.805s +sys 0m0.479s +Code generation completed in 34 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index ef337c9d60..4c13616b50 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005349636077880859  +DEBUG: model prefixing takes 0.005697011947631836  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.935 s +1 processes with 1240 diagrams generated in 1.926 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.748 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.807 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.358 s +ALOHA: aloha creates 5 routines in 0.362 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.381s -user 0m13.197s -sys 0m0.107s +real 0m13.431s +user 0m13.266s +sys 0m0.108s Code generation completed in 13 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 8951b7a72a..573fe7ee40 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005605459213256836  +DEBUG: model prefixing takes 0.005885124206542969  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.081 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -216,17 +216,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s -Wrote files for 32 helas calls in 0.167 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Wrote files for 32 helas calls in 0.170 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.154 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.136 s +ALOHA: aloha creates 4 routines in 0.141 s FFV1 FFV1 FFV1 @@ -272,10 +272,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.265s -user 0m1.936s -sys 0m0.297s -Code generation completed in 2 seconds +real 0m2.271s +user 0m1.947s +sys 0m0.318s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gq_ttq.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index 4d64c445bc..ae4eb0c582 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0055768489837646484  +DEBUG: model prefixing takes 0.005564212799072266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -208,7 +208,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.151 s FFV1 FFV1 FFV1 @@ -224,7 +224,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.758s -user 0m0.605s -sys 0m0.050s -Code generation completed in 1 seconds +real 0m1.469s +user 0m0.619s +sys 0m0.058s +Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/gq_ttq.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 5cf2a423bb..e8641c274f 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -162,13 +162,13 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.272 s +ALOHA: aloha creates 4 routines in 0.275 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.260 s +ALOHA: aloha creates 8 routines in 0.259 s VVS3 VVV1 FFV1 @@ -206,10 +206,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.072s -user 0m1.915s +real 0m2.195s +user 0m1.897s sys 0m0.292s -Code generation completed in 3 seconds +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/heft_gg_bb.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index e6d3f8224e..10bc8ea3f2 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.266 s +ALOHA: aloha creates 4 routines in 0.270 s VVS3 VVV1 FFV1 @@ -167,7 +167,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.664s -user 0m0.596s -sys 0m0.042s -Code generation completed in 0 seconds +real 0m0.659s +user 0m0.618s +sys 0m0.032s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/heft_gg_bb.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index c12c75b63f..0ea4d26630 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005651950836181641  +DEBUG: model prefixing takes 0.0058252811431884766  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.112 s +4 processes with 8 diagrams generated in 0.111 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -222,7 +222,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.668 s +12 processes with 144 diagrams generated in 0.665 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -353,19 +353,19 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.209 s -Wrote files for 212 helas calls in 0.845 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.207 s +Wrote files for 212 helas calls in 0.855 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.212 s +ALOHA: aloha creates 3 routines in 0.208 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.209 s +ALOHA: aloha creates 6 routines in 0.207 s FFV1 FFV1 FFV1 @@ -461,9 +461,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.772s -user 0m4.213s -sys 0m0.546s +real 0m4.799s +user 0m4.206s +sys 0m0.574s Code generation completed in 5 seconds ************************************************************ * * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index bd8247c682..e090fc68c6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005708217620849609  +DEBUG: model prefixing takes 0.00563502311706543  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -167,7 +167,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.031 s +5 processes with 7 diagrams generated in 0.030 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -207,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.146 s +13 processes with 76 diagrams generated in 0.139 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -373,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.940 s +65 processes with 1119 diagrams generated in 1.865 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -691,8 +691,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1548]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.324 s -Wrote files for 810 helas calls in 2.828 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.314 s +Wrote files for 810 helas calls in 2.832 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -700,14 +700,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.347 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.324 s +ALOHA: aloha creates 10 routines in 0.321 s VVV1 VVV1 FFV1 @@ -885,10 +885,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.789s -user 0m9.833s -sys 0m0.922s -Code generation completed in 10 seconds +real 0m10.708s +user 0m9.682s +sys 0m0.994s +Code generation completed in 11 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 4dd9535986..204ade8c71 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.141585111618042  +DEBUG: model prefixing takes 0.1429440975189209  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.818 s +1 processes with 72 diagrams generated in 3.791 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -119,8 +119,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.194 s -Wrote files for 119 helas calls in 0.400 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s +Wrote files for 119 helas calls in 0.408 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -128,14 +128,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.332 s +ALOHA: aloha creates 5 routines in 0.326 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.341 s +ALOHA: aloha creates 10 routines in 0.339 s VVV5 VVV5 FFV1 @@ -176,10 +176,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.352s -user 0m7.058s -sys 0m0.278s -Code generation completed in 7 seconds +real 0m7.621s +user 0m7.014s +sys 0m0.313s +Code generation completed in 8 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index bf8ab5b11a..7bf06a0cbc 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14227843284606934  +DEBUG: model prefixing takes 0.14331912994384766  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.806 s +1 processes with 72 diagrams generated in 3.821 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -109,14 +109,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.191 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.325 s VVV5 VVV5 FFV1 @@ -136,7 +136,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.225s -user 0m5.127s -sys 0m0.077s +real 0m5.255s +user 0m5.156s +sys 0m0.069s Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 08a8394380..2949288e67 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -582,18 +582,18 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s -Wrote files for 16 helas calls in 0.085 s +Wrote files for 16 helas calls in 0.084 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.191 s +ALOHA: aloha creates 3 routines in 0.188 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.191 s +ALOHA: aloha creates 6 routines in 0.186 s VVV1 VSS1 VSS1 @@ -630,9 +630,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.058s -user 0m2.745s -sys 0m0.312s +real 0m3.043s +user 0m2.728s +sys 0m0.310s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index 2549db0cb0..a659f844a2 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -592,7 +592,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.386s -user 0m1.303s -sys 0m0.058s +real 0m1.529s +user 0m1.309s +sys 0m0.053s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index ec83eacc71..79fe1a685b 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.122 s +1 processes with 3 diagrams generated in 0.126 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -582,7 +582,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.077 s +Wrote files for 10 helas calls in 0.079 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -591,7 +591,7 @@ ALOHA: aloha creates 2 routines in 0.143 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.138 s +ALOHA: aloha creates 4 routines in 0.139 s VVV1 FFV1 FFV1 @@ -627,9 +627,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.928s -user 0m2.618s -sys 0m0.290s +real 0m3.036s +user 0m2.643s +sys 0m0.301s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuCxtypes.h b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 824274004a..e18785a5e5 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.121 s +1 processes with 3 diagrams generated in 0.122 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -575,7 +575,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.141 s VVV1 FFV1 FFV1 @@ -590,7 +590,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.321s -user 0m1.235s -sys 0m0.071s -Code generation completed in 2 seconds +real 0m1.376s +user 0m1.241s +sys 0m0.064s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h index 58fa8fc273..0857275ae4 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/EventStatistics.h @@ -106,8 +106,9 @@ namespace mg5amcCpu , sqsWGdiff( 0 ) , tag( "" ) {} // Combine two EventStatistics -#if __HIP_CLANG_ONLY__ - // Disable optimizations for this function in HIPCC (work around FPE crash #1003) +#ifdef __clang__ + // Disable optimizations for this function in HIP (work around FPE crash #1003: originally using #if __HIP_CLANG_ONLY__) + // Disable optimizations for this function in clang tout court (work around FPE crash #1005: now using #ifdef __clang__) // See https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-selectively-disabling-optimization __attribute__( ( optnone ) ) #endif diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuCxtypes.h b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuCxtypes.h index 58a16fec8a..92d74fd6db 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuCxtypes.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuCxtypes.h @@ -704,7 +704,7 @@ namespace mg5amcGpu namespace mg5amcCpu #endif { - // The cxtype_ref class (a non-const reference to two fp variables) was originally designed for cxtype_v::operator[] + // The cxtype_ref class (a const reference to two non-const fp variables) was originally designed for cxtype_v::operator[] // It used to be included in the code only when MGONGPU_HAS_CPPCXTYPEV_BRK (originally MGONGPU_HAS_CPPCXTYPE_REF) is defined // It is now always included in the code because it is needed also to access an fptype wavefunction buffer as a cxtype class cxtype_ref @@ -712,9 +712,9 @@ namespace mg5amcCpu public: cxtype_ref() = delete; cxtype_ref( const cxtype_ref& ) = delete; - cxtype_ref( cxtype_ref&& ) = default; // copy refs + cxtype_ref( cxtype_ref&& ) = default; // copy const refs __host__ __device__ cxtype_ref( fptype& r, fptype& i ) - : m_preal( &r ), m_pimag( &i ) {} // copy refs + : m_preal( &r ), m_pimag( &i ) {} // copy (create from) const refs cxtype_ref& operator=( const cxtype_ref& ) = delete; //__host__ __device__ cxtype_ref& operator=( cxtype_ref&& c ) {...} // REMOVED! Should copy refs or copy values? No longer needed in cxternary __host__ __device__ cxtype_ref& operator=( const cxtype& c ) @@ -722,10 +722,11 @@ namespace mg5amcCpu *m_preal = cxreal( c ); *m_pimag = cximag( c ); return *this; - } // copy values + } // copy (assign) non-const values __host__ __device__ operator cxtype() const { return cxmake( *m_preal, *m_pimag ); } private: - fptype *m_preal, *m_pimag; // RI + fptype* const m_preal; // const pointer to non-const fptype R + fptype* const m_pimag; // const pointer to non-const fptype I }; // Printout to stream for user defined types diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h index 1693fcea7c..9f3533a875 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/mgOnGpuVectors.h @@ -104,8 +104,9 @@ namespace mg5amcCpu #ifdef MGONGPU_HAS_CPPCXTYPEV_BRK // NB: THIS IS THE FUNDAMENTAL DIFFERENCE BETWEEN MGONGPU_HAS_CPPCXTYPEV_BRK DEFINED AND NOT DEFINED // NB: the alternative "clang" implementation is simpler: it simply does not have any bracket operator[] - // NB: ** do NOT implement operator[] to return a value: it does not fail the build (why?) and gives unexpected results! ** - cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } + //cxtype_ref operator[]( size_t i ) const { return cxtype_ref( m_real[i], m_imag[i] ); } // gcc14.2 build fails #1004 + cxtype_ref operator[]( size_t i ) { return cxtype_ref( m_real[i], m_imag[i] ); } + cxtype operator[]( size_t i ) const { return cxtype( m_real[i], m_imag[i] ); } #endif const fptype_v& real() const { From a647b4bf53cb26591d04ec78bca9719a298995e0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 17:50:37 +0200 Subject: [PATCH 57/76] [clang] rerun 102 tput tests on itscrd90 - all ok STARTED AT Wed Sep 18 10:03:30 AM CEST 2024 ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean ENDED(1) AT Wed Sep 18 12:28:45 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean ENDED(2) AT Wed Sep 18 12:49:20 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean ENDED(3) AT Wed Sep 18 12:58:32 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ENDED(4) AT Wed Sep 18 01:01:21 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ENDED(5) AT Wed Sep 18 01:04:08 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common ENDED(6) AT Wed Sep 18 01:07:00 PM CEST 2024 [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean ENDED(7) AT Wed Sep 18 01:38:02 PM CEST 2024 [Status=0] --- .../log_eemumu_mad_d_inl0_hrd0.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_curhst.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl0_hrd1.txt | 170 ++++++++-------- .../log_eemumu_mad_d_inl1_hrd0.txt | 172 ++++++++-------- .../log_eemumu_mad_d_inl1_hrd1.txt | 170 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_curhst.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd1.txt | 172 ++++++++-------- .../log_eemumu_mad_f_inl1_hrd0.txt | 170 ++++++++-------- .../log_eemumu_mad_f_inl1_hrd1.txt | 172 ++++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 172 ++++++++-------- .../log_eemumu_mad_m_inl0_hrd1.txt | 172 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 168 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 168 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 168 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_curhst.txt | 168 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 168 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd1.txt | 170 ++++++++-------- .../log_ggtt_mad_d_inl1_hrd0.txt | 172 ++++++++-------- .../log_ggtt_mad_d_inl1_hrd1.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_curhst.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 172 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd1.txt | 166 ++++++++-------- .../log_ggtt_mad_f_inl1_hrd0.txt | 182 ++++++++--------- .../log_ggtt_mad_f_inl1_hrd1.txt | 176 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 172 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd1.txt | 164 +++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 184 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd1.txt | 186 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 184 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd1.txt | 186 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_curhst.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 184 ++++++++--------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 188 +++++++++--------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 188 +++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 184 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 184 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_curhst.txt | 184 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 184 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 182 ++++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 188 +++++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 188 +++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 184 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 160 +++++++-------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 160 +++++++-------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 160 +++++++-------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 160 +++++++-------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 160 +++++++-------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 158 +++++++-------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 160 +++++++-------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 162 +++++++-------- .../log_gqttq_mad_d_inl0_hrd0.txt | 182 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 182 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd1.txt | 180 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 184 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 184 ++++++++--------- .../log_gqttq_mad_f_inl0_hrd1.txt | 186 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 188 +++++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 188 +++++++++--------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 168 ++++++++-------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 172 ++++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 168 ++++++++-------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 168 ++++++++-------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 168 ++++++++-------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 172 ++++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 188 +++++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 188 +++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 186 ++++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 186 ++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 184 ++++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 184 ++++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 164 +++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 166 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 166 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 168 ++++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 166 ++++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 168 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 168 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 170 ++++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 172 ++++++++-------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 166 ++++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 172 ++++++++-------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 164 +++++++-------- 102 files changed, 8931 insertions(+), 8931 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 1292ed24b8..95eb3e309d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:08:03 +DATE: 2024-09-18_12:08:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.330379e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.527996e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.788543e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.586175e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.543752e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.774580e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.812638 sec +TOTAL : 0.722428 sec INFO: No Floating Point Exceptions have been reported - 2,711,766,628 cycles # 2.867 GHz - 4,239,903,132 instructions # 1.56 insn per cycle - 1.138564764 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,618,484,542 cycles # 2.848 GHz + 4,056,431,697 instructions # 1.55 insn per cycle + 1.017935073 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.032481e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.205909e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.205909e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.036688e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208211e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208211e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.541164 sec +TOTAL : 6.477862 sec INFO: No Floating Point Exceptions have been reported - 19,214,248,144 cycles # 2.933 GHz - 46,179,436,349 instructions # 2.40 insn per cycle - 6.552095575 seconds time elapsed + 19,055,513,200 cycles # 2.940 GHz + 46,088,548,361 instructions # 2.42 insn per cycle + 6.483409710 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.566602e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.052859e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.052859e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.577995e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.056243e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.056243e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.470271 sec +TOTAL : 4.401175 sec INFO: No Floating Point Exceptions have been reported - 13,145,357,361 cycles # 2.934 GHz - 31,720,883,797 instructions # 2.41 insn per cycle - 4.481479023 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) + 12,945,161,675 cycles # 2.938 GHz + 31,621,534,754 instructions # 2.44 insn per cycle + 4.406822784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.961947e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.743984e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.743984e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.979178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.760192e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.760192e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.647893 sec +TOTAL : 3.589167 sec INFO: No Floating Point Exceptions have been reported - 10,212,054,728 cycles # 2.792 GHz - 19,686,910,587 instructions # 1.93 insn per cycle - 3.658422867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) + 10,070,726,803 cycles # 2.802 GHz + 19,587,544,877 instructions # 1.94 insn per cycle + 3.594697986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.012892e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.837375e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.837375e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.973756e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755912e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755912e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.568887 sec +TOTAL : 3.601272 sec INFO: No Floating Point Exceptions have been reported - 10,042,390,879 cycles # 2.806 GHz - 19,342,891,969 instructions # 1.93 insn per cycle - 3.579550757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) + 9,893,708,282 cycles # 2.744 GHz + 19,261,714,155 instructions # 1.95 insn per cycle + 3.606677205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.687611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.233598e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.233598e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.684138e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.223088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.223088e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.179012 sec +TOTAL : 4.152604 sec INFO: No Floating Point Exceptions have been reported - 8,766,087,418 cycles # 2.093 GHz - 15,826,503,490 instructions # 1.81 insn per cycle - 4.190350116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) + 8,635,892,874 cycles # 2.077 GHz + 15,755,316,929 instructions # 1.82 insn per cycle + 4.158382190 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 656f6e2f98..7e1127db04 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:45:05 +DATE: 2024-09-18_12:51:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.206256e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682542e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.682542e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.746451e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921944e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.921944e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.457281 sec +TOTAL : 2.223700 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 7,617,652,543 cycles # 2.830 GHz - 12,995,599,451 instructions # 1.71 insn per cycle - 2.778749807 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge + 7,220,060,160 cycles # 2.915 GHz + 13,018,391,047 instructions # 1.80 insn per cycle + 2.533250665 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.578808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.114252e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.114252e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.004941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165788e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 7.297311 sec +TOTAL : 6.898046 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 20,775,260,342 cycles # 2.853 GHz - 46,581,102,942 instructions # 2.24 insn per cycle - 7.320240357 seconds time elapsed + 20,322,657,427 cycles # 2.944 GHz + 46,321,216,193 instructions # 2.28 insn per cycle + 6.904944789 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.428539e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.841895e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.841895e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.496333e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.925589e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.925589e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.145544 sec +TOTAL : 4.836326 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 14,656,531,148 cycles # 2.850 GHz - 32,719,868,481 instructions # 2.23 insn per cycle - 5.168221991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) + 14,226,515,518 cycles # 2.937 GHz + 32,466,683,813 instructions # 2.28 insn per cycle + 4.843971134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.782197e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438081e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.825666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.487837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.487837e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.265326 sec +TOTAL : 4.083516 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 11,667,988,214 cycles # 2.728 GHz - 21,208,810,330 instructions # 1.82 insn per cycle - 4.287433901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) + 11,316,310,914 cycles # 2.767 GHz + 20,951,601,246 instructions # 1.85 insn per cycle + 4.090897830 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.824257e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.501728e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.501728e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.895357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.603837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603837e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.177936 sec +TOTAL : 3.954670 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 11,439,960,592 cycles # 2.728 GHz - 20,869,154,642 instructions # 1.82 insn per cycle - 4.198165961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) + 11,188,953,637 cycles # 2.824 GHz + 20,622,311,623 instructions # 1.84 insn per cycle + 3.962452110 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.555883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.022583e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.022583e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.623904e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.111036e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.111036e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.778023 sec +TOTAL : 4.511201 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,270,194,102 cycles # 2.143 GHz - 17,125,695,085 instructions # 1.67 insn per cycle - 4.797944534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) + 9,933,844,941 cycles # 2.199 GHz + 16,904,875,780 instructions # 1.70 insn per cycle + 4.518707685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index c883b5b3b2..d91c4828d9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:57:26 +DATE: 2024-09-18_13:04:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.026987e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683583e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.857936e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.531377e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.591267e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.748328e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.462390 sec +TOTAL : 1.353084 sec INFO: No Floating Point Exceptions have been reported - 4,897,119,539 cycles # 2.892 GHz - 7,502,819,293 instructions # 1.53 insn per cycle - 1.751403177 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common + 4,633,251,875 cycles # 2.904 GHz + 7,212,974,866 instructions # 1.56 insn per cycle + 1.652016166 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.029304e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.201451e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.201451e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029394e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199449e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199449e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 7.039422 sec +TOTAL : 6.897530 sec INFO: No Floating Point Exceptions have been reported - 20,621,489,655 cycles # 2.926 GHz - 46,653,049,885 instructions # 2.26 insn per cycle - 7.049549267 seconds time elapsed + 20,162,123,319 cycles # 2.922 GHz + 46,195,009,239 instructions # 2.29 insn per cycle + 6.903032860 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.563131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.046448e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.046448e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.570184e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049072e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049072e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.950021 sec +TOTAL : 4.798502 sec INFO: No Floating Point Exceptions have been reported - 14,503,935,115 cycles # 2.925 GHz - 32,091,166,775 instructions # 2.21 insn per cycle - 4.960008276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) + 14,063,092,419 cycles # 2.928 GHz + 31,626,728,543 instructions # 2.25 insn per cycle + 4.804471582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.961733e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.745410e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.745410e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.970535e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.757812e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.757812e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.125348 sec +TOTAL : 3.978190 sec INFO: No Floating Point Exceptions have been reported - 11,625,768,108 cycles # 2.812 GHz - 19,969,403,537 instructions # 1.72 insn per cycle - 4.135325887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) + 11,201,119,802 cycles # 2.813 GHz + 19,490,103,913 instructions # 1.74 insn per cycle + 3.984105389 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.013648e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.832922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.832922e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.023963e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.847631e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.847631e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.042328 sec +TOTAL : 3.897854 sec INFO: No Floating Point Exceptions have been reported - 11,410,026,502 cycles # 2.817 GHz - 19,423,165,232 instructions # 1.70 insn per cycle - 4.052144921 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) + 11,011,148,409 cycles # 2.821 GHz + 18,950,488,449 instructions # 1.72 insn per cycle + 3.903822013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.712486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.267405e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.267405e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.727909e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.291192e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.291192e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.597004 sec +TOTAL : 4.434907 sec INFO: No Floating Point Exceptions have been reported - 10,166,885,638 cycles # 2.209 GHz - 15,890,691,650 instructions # 1.56 insn per cycle - 4.606740948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) + 9,769,161,551 cycles # 2.200 GHz + 15,456,644,765 instructions # 1.58 insn per cycle + 4.440874371 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index be71099caf..bb28d7f936 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:54:39 +DATE: 2024-09-18_13:01:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.198136e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.691819e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.863296e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.590069e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.635013e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.812541e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.975349 sec +TOTAL : 0.989483 sec INFO: No Floating Point Exceptions have been reported - 3,491,422,658 cycles # 2.884 GHz - 6,978,748,855 instructions # 2.00 insn per cycle - 1.269405520 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 3,546,265,877 cycles # 2.891 GHz + 7,041,909,652 instructions # 1.99 insn per cycle + 1.283490715 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.025076e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.196021e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.026273e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.198046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.198046e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.587511 sec +TOTAL : 6.545718 sec INFO: No Floating Point Exceptions have been reported - 19,326,799,797 cycles # 2.930 GHz - 46,201,035,622 instructions # 2.39 insn per cycle - 6.597179522 seconds time elapsed + 19,118,940,916 cycles # 2.919 GHz + 46,090,671,775 instructions # 2.41 insn per cycle + 6.551190549 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.561064e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.042427e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.042427e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.571851e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.052236e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052236e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.487009 sec +TOTAL : 4.417688 sec INFO: No Floating Point Exceptions have been reported - 13,122,812,956 cycles # 2.919 GHz - 31,726,329,549 instructions # 2.42 insn per cycle - 4.497261752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) + 12,968,655,547 cycles # 2.932 GHz + 31,622,331,959 instructions # 2.44 insn per cycle + 4.423489151 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.964017e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.742650e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.742650e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.966543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.746878e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.746878e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.652673 sec +TOTAL : 3.610850 sec INFO: No Floating Point Exceptions have been reported - 10,238,914,237 cycles # 2.798 GHz - 19,709,087,829 instructions # 1.92 insn per cycle - 3.662709498 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) + 10,121,576,066 cycles # 2.799 GHz + 19,587,082,892 instructions # 1.94 insn per cycle + 3.616728345 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.987318e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.812393e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.812393e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.021109e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.843929e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.843929e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.613935 sec +TOTAL : 3.520824 sec INFO: No Floating Point Exceptions have been reported - 10,087,966,707 cycles # 2.786 GHz - 19,370,511,659 instructions # 1.92 insn per cycle - 3.624175302 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) + 9,901,674,637 cycles # 2.808 GHz + 19,249,331,839 instructions # 1.94 insn per cycle + 3.526811564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.729073e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.292942e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.292942e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.723287e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.278399e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.278399e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.086984 sec +TOTAL : 4.062783 sec INFO: No Floating Point Exceptions have been reported - 8,757,365,492 cycles # 2.138 GHz - 15,836,859,575 instructions # 1.81 insn per cycle - 4.097082998 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) + 8,637,222,753 cycles # 2.123 GHz + 15,755,461,061 instructions # 1.82 insn per cycle + 4.068672997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 1574c6c3cf..95f355ef67 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:51:49 +DATE: 2024-09-18_12:58:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.996142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.634832e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.796610e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.092066e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.598729e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.734259e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.900321 sec +TOTAL : 1.884241 sec INFO: No Floating Point Exceptions have been reported - 6,197,801,426 cycles # 2.910 GHz - 11,411,789,503 instructions # 1.84 insn per cycle - 2.187318710 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst + 6,167,226,842 cycles # 2.911 GHz + 11,436,463,316 instructions # 1.85 insn per cycle + 2.174841291 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.025883e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199963e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199963e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.035778e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.207383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.207383e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.587430 sec +TOTAL : 6.487731 sec INFO: No Floating Point Exceptions have been reported - 19,281,256,149 cycles # 2.928 GHz - 46,192,094,635 instructions # 2.40 insn per cycle - 6.597986195 seconds time elapsed + 19,058,569,596 cycles # 2.936 GHz + 46,087,741,277 instructions # 2.42 insn per cycle + 6.493592711 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.548679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019540e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.566016e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044387e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044387e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.521143 sec +TOTAL : 4.436390 sec INFO: No Floating Point Exceptions have been reported - 13,278,378,525 cycles # 2.931 GHz - 31,736,760,460 instructions # 2.39 insn per cycle - 4.531295049 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2: 0) (512y: 0) (512z: 0) + 12,971,922,098 cycles # 2.921 GHz + 31,622,790,809 instructions # 2.44 insn per cycle + 4.442502369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.952371e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.722642e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.722642e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.978030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.768932e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768932e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.672885 sec +TOTAL : 3.592451 sec INFO: No Floating Point Exceptions have been reported - 10,228,932,843 cycles # 2.778 GHz - 19,706,958,837 instructions # 1.93 insn per cycle - 3.682647007 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1912) (512y: 0) (512z: 0) + 10,115,574,971 cycles # 2.812 GHz + 19,587,420,856 instructions # 1.94 insn per cycle + 3.598300355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.003393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.813864e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.813864e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.014830e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.827477e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.827477e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.587790 sec +TOTAL : 3.531675 sec INFO: No Floating Point Exceptions have been reported - 10,033,694,863 cycles # 2.790 GHz - 19,370,562,804 instructions # 1.93 insn per cycle - 3.597832664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1651) (512y: 178) (512z: 0) + 9,897,196,547 cycles # 2.799 GHz + 19,249,419,683 instructions # 1.94 insn per cycle + 3.537559003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.709775e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.272417e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.272417e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.720646e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.279247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279247e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.131241 sec +TOTAL : 4.067635 sec INFO: No Floating Point Exceptions have been reported - 8,787,275,470 cycles # 2.123 GHz - 15,836,849,319 instructions # 1.80 insn per cycle - 4.141346630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 848) (512y: 156) (512z: 1257) + 8,664,270,263 cycles # 2.127 GHz + 15,755,691,110 instructions # 1.82 insn per cycle + 4.073643316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 3b02782d45..e73a9b015a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:08:35 +DATE: 2024-09-18_12:09:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.564401e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.700588e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875805e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.079594e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.670378e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.825463e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.684524 sec +TOTAL : 0.680250 sec INFO: No Floating Point Exceptions have been reported - 2,669,304,279 cycles # 2.860 GHz - 4,081,785,887 instructions # 1.53 insn per cycle - 0.991787338 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,578,534,884 cycles # 2.821 GHz + 4,030,538,684 instructions # 1.56 insn per cycle + 0.973967444 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.030249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204005e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204005e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.022698e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.191211e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.191211e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.555775 sec +TOTAL : 6.567027 sec INFO: No Floating Point Exceptions have been reported - 19,264,579,235 cycles # 2.934 GHz - 46,142,725,089 instructions # 2.40 insn per cycle - 6.567398103 seconds time elapsed + 19,075,762,627 cycles # 2.903 GHz + 46,055,106,551 instructions # 2.41 insn per cycle + 6.572547698 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.561230e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.041001e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.041001e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.585711e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.070341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.070341e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.486484 sec +TOTAL : 4.383967 sec INFO: No Floating Point Exceptions have been reported - 13,180,735,522 cycles # 2.931 GHz - 31,698,753,932 instructions # 2.40 insn per cycle - 4.497601224 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1649) (avx2: 0) (512y: 0) (512z: 0) + 12,890,625,740 cycles # 2.937 GHz + 31,557,909,117 instructions # 2.45 insn per cycle + 4.389588631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.959717e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.742175e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.742175e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.969969e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755961e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.661759 sec +TOTAL : 3.604328 sec INFO: No Floating Point Exceptions have been reported - 10,296,251,645 cycles # 2.804 GHz - 19,686,624,933 instructions # 1.91 insn per cycle - 3.673145773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1895) (512y: 0) (512z: 0) + 10,100,174,359 cycles # 2.799 GHz + 19,576,296,506 instructions # 1.94 insn per cycle + 3.609879791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1894) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.002735e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.826033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826033e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.022206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.841390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.841390e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.593177 sec +TOTAL : 3.520245 sec INFO: No Floating Point Exceptions have been reported - 10,082,197,083 cycles # 2.798 GHz - 19,384,360,663 instructions # 1.92 insn per cycle - 3.604587412 seconds time elapsed + 9,894,539,917 cycles # 2.807 GHz + 19,271,397,768 instructions # 1.95 insn per cycle + 3.525910639 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.753760e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344313e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344313e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.762660e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347769e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.042847 sec +TOTAL : 3.982657 sec INFO: No Floating Point Exceptions have been reported - 8,657,274,459 cycles # 2.136 GHz - 15,708,080,882 instructions # 1.81 insn per cycle - 4.054289402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 834) (512y: 156) (512z: 1237) + 8,470,289,841 cycles # 2.124 GHz + 15,587,855,124 instructions # 1.84 insn per cycle + 3.988212621 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 833) (512y: 153) (512z: 1240) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 9adc226af5..8184b4eff2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:33:55 +DATE: 2024-09-18_12:42:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.203471e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505439e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.793875e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.357145e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.547980e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.727026e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.692895 sec +TOTAL : 0.689616 sec INFO: No Floating Point Exceptions have been reported - 2,667,104,303 cycles # 2.870 GHz - 4,197,568,068 instructions # 1.57 insn per cycle - 0.991023921 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 + 2,681,392,745 cycles # 2.885 GHz + 4,097,806,151 instructions # 1.53 insn per cycle + 0.986657014 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.609773e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.065122e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.065122e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.608983e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.060555e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060555e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.323744 sec +TOTAL : 4.321503 sec INFO: No Floating Point Exceptions have been reported - 12,680,899,676 cycles # 2.930 GHz - 32,573,373,461 instructions # 2.57 insn per cycle - 4.329822925 seconds time elapsed + 12,686,452,587 cycles # 2.933 GHz + 32,573,246,433 instructions # 2.57 insn per cycle + 4.326967751 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.020941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.881765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.881765e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.001283e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.839506e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.839506e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.528541 sec +TOTAL : 3.560441 sec INFO: No Floating Point Exceptions have been reported - 10,343,960,768 cycles # 2.928 GHz - 24,660,363,232 instructions # 2.38 insn per cycle - 3.534351751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) + 10,462,099,873 cycles # 2.934 GHz + 24,899,188,532 instructions # 2.38 insn per cycle + 3.566316228 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.219408e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.252697e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.252697e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.199006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.213700e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.213700e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.248651 sec +TOTAL : 3.271279 sec INFO: No Floating Point Exceptions have been reported - 9,122,079,188 cycles # 2.804 GHz - 16,949,443,243 instructions # 1.86 insn per cycle - 3.254977824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1616) (512y: 0) (512z: 0) + 9,171,998,387 cycles # 2.800 GHz + 16,835,147,245 instructions # 1.84 insn per cycle + 3.276861848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1599) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.281631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.372235e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.372235e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.270242e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359980e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359980e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.167737 sec +TOTAL : 3.184078 sec INFO: No Floating Point Exceptions have been reported - 8,922,630,281 cycles # 2.812 GHz - 16,368,012,425 instructions # 1.83 insn per cycle - 3.174211351 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1352) (512y: 139) (512z: 0) + 8,899,793,398 cycles # 2.791 GHz + 16,396,706,280 instructions # 1.84 insn per cycle + 3.189617083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1355) (512y: 139) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.953386e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.685128e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.685128e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.962735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.715557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.715557e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.635921 sec +TOTAL : 3.614098 sec INFO: No Floating Point Exceptions have been reported - 7,907,839,436 cycles # 2.172 GHz - 14,593,864,068 instructions # 1.85 insn per cycle - 3.642895717 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 955) + 7,891,427,724 cycles # 2.181 GHz + 14,556,226,424 instructions # 1.84 insn per cycle + 3.619718707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 946) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index a111e191c2..a7c1b0753b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:34:21 +DATE: 2024-09-18_12:42:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.369824e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.600320e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.803756e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.653794e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.579157e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778336e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.688866 sec +TOTAL : 0.686013 sec INFO: No Floating Point Exceptions have been reported - 2,687,883,365 cycles # 2.879 GHz - 4,137,672,828 instructions # 1.54 insn per cycle - 0.991982760 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 + 2,680,206,326 cycles # 2.872 GHz + 4,167,068,379 instructions # 1.55 insn per cycle + 0.992521934 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039868165201E-002 Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.085552e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.924035e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.924035e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.093810e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.934858e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.934858e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.430840 sec +TOTAL : 3.416949 sec INFO: No Floating Point Exceptions have been reported - 10,018,256,596 cycles # 2.916 GHz - 25,507,694,274 instructions # 2.55 insn per cycle - 3.436494229 seconds time elapsed + 10,012,195,167 cycles # 2.926 GHz + 25,507,793,848 instructions # 2.55 insn per cycle + 3.422575217 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.371301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.639987e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.639987e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.342172e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.581913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581913e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.073954 sec +TOTAL : 3.102371 sec INFO: No Floating Point Exceptions have been reported - 9,025,495,783 cycles # 2.931 GHz - 21,478,170,721 instructions # 2.38 insn per cycle - 3.080490687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2: 0) (512y: 0) (512z: 0) + 9,123,975,305 cycles # 2.936 GHz + 21,542,843,128 instructions # 2.36 insn per cycle + 3.108003766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.348565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529213e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529213e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.389028e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617798e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.091062 sec +TOTAL : 3.042023 sec INFO: No Floating Point Exceptions have been reported - 8,721,037,733 cycles # 2.816 GHz - 15,901,191,500 instructions # 1.82 insn per cycle - 3.097416237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1489) (512y: 0) (512z: 0) + 8,587,076,543 cycles # 2.818 GHz + 15,956,957,926 instructions # 1.86 insn per cycle + 3.047668407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.428348e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.696607e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.696607e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.421436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.692453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.692453e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.003427 sec +TOTAL : 3.006782 sec INFO: No Floating Point Exceptions have been reported - 8,472,649,935 cycles # 2.816 GHz - 15,622,192,614 instructions # 1.84 insn per cycle - 3.009695803 seconds time elapsed + 8,445,737,284 cycles # 2.805 GHz + 15,563,019,384 instructions # 1.84 insn per cycle + 3.012659502 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.053185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.879921e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.879921e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.061400e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.904070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.904070e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.481285 sec +TOTAL : 3.459956 sec INFO: No Floating Point Exceptions have been reported - 7,632,139,448 cycles # 2.189 GHz - 14,304,829,590 instructions # 1.87 insn per cycle - 3.488200715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1032) (512y: 164) (512z: 877) + 7,611,248,188 cycles # 2.197 GHz + 14,286,576,836 instructions # 1.88 insn per cycle + 3.465475679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1031) (512y: 164) (512z: 876) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165088E-002 Relative difference = 1.0277089312025782e-08 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index bc5233a5ba..db1ecc021d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:09:06 +DATE: 2024-09-18_12:09:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.192132e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.336696e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.300693e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.236538e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.678017e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.558515e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585451 sec +TOTAL : 0.582271 sec INFO: No Floating Point Exceptions have been reported - 2,336,870,734 cycles # 2.880 GHz - 3,644,936,097 instructions # 1.56 insn per cycle - 0.870296260 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,326,541,752 cycles # 2.874 GHz + 3,619,452,327 instructions # 1.56 insn per cycle + 0.866579999 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.072651e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.269381e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.269381e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.078772e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.274321e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274321e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.255061 sec +TOTAL : 6.204384 sec INFO: No Floating Point Exceptions have been reported - 18,355,246,050 cycles # 2.931 GHz - 45,043,077,667 instructions # 2.45 insn per cycle - 6.263286658 seconds time elapsed + 18,271,266,571 cycles # 2.943 GHz + 45,007,026,058 instructions # 2.46 insn per cycle + 6.209806202 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.241713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.430745e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.430745e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.258213e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443370e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443370e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.200925 sec +TOTAL : 3.161459 sec INFO: No Floating Point Exceptions have been reported - 9,386,491,422 cycles # 2.926 GHz - 22,329,398,339 instructions # 2.38 insn per cycle - 3.208910381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) + 9,301,142,039 cycles # 2.938 GHz + 22,273,650,036 instructions # 2.39 insn per cycle + 3.166937253 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.404117e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.697492e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.697492e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.422291e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.701347e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.701347e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.006682 sec +TOTAL : 2.969313 sec INFO: No Floating Point Exceptions have been reported - 8,484,958,572 cycles # 2.815 GHz - 15,797,352,563 instructions # 1.86 insn per cycle - 3.014624816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) + 8,389,284,998 cycles # 2.822 GHz + 15,752,357,337 instructions # 1.88 insn per cycle + 2.974718872 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.426806e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765840e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765840e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.405471e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684326e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.982618 sec +TOTAL : 2.990876 sec INFO: No Floating Point Exceptions have been reported - 8,401,165,701 cycles # 2.811 GHz - 15,653,777,374 instructions # 1.86 insn per cycle - 2.990373231 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) + 8,285,038,888 cycles # 2.766 GHz + 15,588,340,357 instructions # 1.88 insn per cycle + 2.996605246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.426463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.722731e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.722731e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.444926e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.740937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.740937e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.987827 sec +TOTAL : 2.948513 sec INFO: No Floating Point Exceptions have been reported - 6,753,744,387 cycles # 2.255 GHz - 12,906,211,238 instructions # 1.91 insn per cycle - 2.995926915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) + 6,657,028,546 cycles # 2.254 GHz + 12,863,339,645 instructions # 1.93 insn per cycle + 2.954217512 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 95b8681521..47dd15a77b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:45:42 +DATE: 2024-09-18_12:52:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.076713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378449e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.378449e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.148525e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888705e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888705e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.944337 sec +TOTAL : 1.710491 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,200,765,179 cycles # 2.831 GHz - 10,073,714,089 instructions # 1.62 insn per cycle - 2.274311561 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge + 5,650,857,001 cycles # 2.904 GHz + 10,226,411,017 instructions # 1.81 insn per cycle + 2.002623091 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.013177e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.196081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.196081e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.051342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236619e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236619e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.753252 sec +TOTAL : 6.467611 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 19,140,260,721 cycles # 2.851 GHz - 45,281,984,182 instructions # 2.37 insn per cycle - 6.770979415 seconds time elapsed + 18,975,945,343 cycles # 2.932 GHz + 45,166,614,913 instructions # 2.38 insn per cycle + 6.474019296 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.075255e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.101362e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.101362e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.143329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.199468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199468e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.587325 sec +TOTAL : 3.443303 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,243,661,246 cycles # 2.856 GHz - 23,736,113,257 instructions # 2.32 insn per cycle - 3.601313820 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) + 10,057,348,114 cycles # 2.916 GHz + 23,610,490,289 instructions # 2.35 insn per cycle + 3.450411330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.208611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.299964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.299964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.288972e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.428534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428534e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.407535 sec +TOTAL : 3.255640 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,325,104,909 cycles # 2.739 GHz - 16,992,883,294 instructions # 1.82 insn per cycle - 3.420829574 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) + 9,181,255,557 cycles # 2.815 GHz + 16,874,424,213 instructions # 1.84 insn per cycle + 3.262739708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.223475e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.357947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.357947e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.308266e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.504995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.504995e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.391442 sec +TOTAL : 3.233704 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,279,675,392 cycles # 2.737 GHz - 16,862,711,706 instructions # 1.82 insn per cycle - 3.405369019 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) + 9,120,657,024 cycles # 2.815 GHz + 16,716,849,319 instructions # 1.83 insn per cycle + 3.240866405 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.268012e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.359028e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.359028e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.329690e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.465437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465437e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.328677 sec +TOTAL : 3.207876 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 7,543,875,114 cycles # 2.260 GHz - 14,180,005,728 instructions # 1.88 insn per cycle - 3.340981281 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) + 7,429,892,192 cycles # 2.312 GHz + 14,072,572,968 instructions # 1.89 insn per cycle + 3.215041865 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 15fa7d3112..aa8d2ebaf9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:58:00 +DATE: 2024-09-18_13:04:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.336762e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.510278e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.479828e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.285600e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.265115e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156209e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.311031 sec +TOTAL : 1.210437 sec INFO: No Floating Point Exceptions have been reported - 4,430,467,531 cycles # 2.889 GHz - 6,960,795,222 instructions # 1.57 insn per cycle - 1.590735457 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common + 4,156,256,455 cycles # 2.890 GHz + 6,567,216,103 instructions # 1.58 insn per cycle + 1.494886653 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.075223e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270769e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270769e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.068119e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.263869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.263869e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.687798 sec +TOTAL : 6.607111 sec INFO: No Floating Point Exceptions have been reported - 19,629,378,634 cycles # 2.933 GHz - 45,588,143,016 instructions # 2.32 insn per cycle - 6.694819566 seconds time elapsed + 19,321,982,901 cycles # 2.923 GHz + 45,195,162,918 instructions # 2.34 insn per cycle + 6.612467743 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.248280e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.433718e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.433718e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.243612e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.428241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.634348 sec +TOTAL : 3.520615 sec INFO: No Floating Point Exceptions have been reported - 10,674,006,930 cycles # 2.932 GHz - 22,771,305,471 instructions # 2.13 insn per cycle - 3.641620548 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) + 10,297,430,100 cycles # 2.921 GHz + 22,355,563,747 instructions # 2.17 insn per cycle + 3.526233568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.377112e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.647798e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.394598e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672540e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.480978 sec +TOTAL : 3.341420 sec INFO: No Floating Point Exceptions have been reported - 9,770,888,729 cycles # 2.802 GHz - 16,055,948,307 instructions # 1.64 insn per cycle - 3.487814115 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) + 9,418,657,206 cycles # 2.815 GHz + 15,664,231,235 instructions # 1.66 insn per cycle + 3.347085737 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.421430e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.770475e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.770475e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.438466e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.794511e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.794511e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.434581 sec +TOTAL : 3.303984 sec INFO: No Floating Point Exceptions have been reported - 9,740,474,003 cycles # 2.831 GHz - 15,722,386,015 instructions # 1.61 insn per cycle - 3.441655213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) + 9,386,171,386 cycles # 2.837 GHz + 15,303,933,132 instructions # 1.63 insn per cycle + 3.309654062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.441505e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.731463e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.731463e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.452484e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.752423e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.752423e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.419856 sec +TOTAL : 3.289646 sec INFO: No Floating Point Exceptions have been reported - 8,031,724,309 cycles # 2.344 GHz - 12,960,768,751 instructions # 1.61 insn per cycle - 3.427508239 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) + 7,666,750,686 cycles # 2.328 GHz + 12,574,987,911 instructions # 1.64 insn per cycle + 3.295237837 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index cd1edf8b07..b1a0ce17a0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:55:10 +DATE: 2024-09-18_13:01:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.341129e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.656597e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.613096e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.261330e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.330186e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.320632e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.860917 sec +TOTAL : 0.870896 sec INFO: No Floating Point Exceptions have been reported - 3,166,806,232 cycles # 2.886 GHz - 6,390,531,049 instructions # 2.02 insn per cycle - 1.154020491 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 3,169,388,255 cycles # 2.893 GHz + 6,475,497,648 instructions # 2.04 insn per cycle + 1.152555419 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.067801e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267766e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267766e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.073605e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.268067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268067e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.288953 sec +TOTAL : 6.229587 sec INFO: No Floating Point Exceptions have been reported - 18,410,332,076 cycles # 2.925 GHz - 45,054,816,993 instructions # 2.45 insn per cycle - 6.296062030 seconds time elapsed + 18,291,168,737 cycles # 2.934 GHz + 45,011,612,276 instructions # 2.46 insn per cycle + 6.234779882 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.250758e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.437877e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.437877e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.256629e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.445957e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.445957e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.189367 sec +TOTAL : 3.163666 sec INFO: No Floating Point Exceptions have been reported - 9,348,605,615 cycles # 2.926 GHz - 22,330,060,575 instructions # 2.39 insn per cycle - 3.196473636 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) + 9,298,297,527 cycles # 2.935 GHz + 22,274,073,872 instructions # 2.40 insn per cycle + 3.169388774 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.398564e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676038e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676038e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.408682e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697748e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.014863 sec +TOTAL : 2.982101 sec INFO: No Floating Point Exceptions have been reported - 8,484,963,512 cycles # 2.808 GHz - 15,797,579,651 instructions # 1.86 insn per cycle - 3.022063297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) + 8,431,325,276 cycles # 2.823 GHz + 15,754,164,098 instructions # 1.87 insn per cycle + 2.987763200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.428672e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765186e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765186e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.431779e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.775747e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.775747e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.974553 sec +TOTAL : 2.959110 sec INFO: No Floating Point Exceptions have been reported - 8,386,796,916 cycles # 2.814 GHz - 15,657,891,969 instructions # 1.87 insn per cycle - 2.981555653 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) + 8,364,699,070 cycles # 2.822 GHz + 15,593,908,028 instructions # 1.86 insn per cycle + 2.964932423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.445697e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.728486e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.728486e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.447600e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.732419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.732419e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.962981 sec +TOTAL : 2.945776 sec INFO: No Floating Point Exceptions have been reported - 6,681,340,868 cycles # 2.251 GHz - 12,907,012,003 instructions # 1.93 insn per cycle - 2.970184514 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) + 6,653,741,333 cycles # 2.256 GHz + 12,863,983,012 instructions # 1.93 insn per cycle + 2.951493425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 6589d6b6fa..981ff690e7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:52:21 +DATE: 2024-09-18_12:59:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.003631e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.641927e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.671119e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.867533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.208256e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.038855e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.488823 sec +TOTAL : 1.512988 sec INFO: No Floating Point Exceptions have been reported - 4,967,613,517 cycles # 2.899 GHz - 9,171,831,308 instructions # 1.85 insn per cycle - 1.769936667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst + 5,035,172,508 cycles # 2.896 GHz + 9,178,648,119 instructions # 1.82 insn per cycle + 1.796445964 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.073262e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267974e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267974e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.073817e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267606e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.250612 sec +TOTAL : 6.232302 sec INFO: No Floating Point Exceptions have been reported - 18,322,110,309 cycles # 2.929 GHz - 45,051,388,062 instructions # 2.46 insn per cycle - 6.257775360 seconds time elapsed + 18,275,461,834 cycles # 2.931 GHz + 45,008,664,367 instructions # 2.46 insn per cycle + 6.237799317 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.247339e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.424409e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424409e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.243088e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.444450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.444450e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.190677 sec +TOTAL : 3.181850 sec INFO: No Floating Point Exceptions have been reported - 9,359,387,266 cycles # 2.928 GHz - 22,331,498,291 instructions # 2.39 insn per cycle - 3.197654484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2: 0) (512y: 0) (512z: 0) + 9,350,023,781 cycles # 2.934 GHz + 22,274,333,552 instructions # 2.38 insn per cycle + 3.187507510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.363785e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.679448e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.679448e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.392219e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668104e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668104e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.047896 sec +TOTAL : 2.999184 sec INFO: No Floating Point Exceptions have been reported - 8,583,132,130 cycles # 2.811 GHz - 15,806,350,534 instructions # 1.84 insn per cycle - 3.054826008 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2564) (512y: 0) (512z: 0) + 8,440,748,249 cycles # 2.810 GHz + 15,754,020,269 instructions # 1.87 insn per cycle + 3.004841956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.430638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.771124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.771124e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.422328e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.772097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.772097e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.974287 sec +TOTAL : 2.968153 sec INFO: No Floating Point Exceptions have been reported - 8,401,059,167 cycles # 2.818 GHz - 15,651,581,046 instructions # 1.86 insn per cycle - 2.981875735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 12) (512z: 0) + 8,367,700,869 cycles # 2.815 GHz + 15,588,459,242 instructions # 1.86 insn per cycle + 2.973858535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.438697e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.736996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.736996e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.440851e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737976e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737976e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.973534 sec +TOTAL : 2.952558 sec INFO: No Floating Point Exceptions have been reported - 6,722,109,548 cycles # 2.256 GHz - 12,906,606,049 instructions # 1.92 insn per cycle - 2.981153680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1697) (512y: 17) (512z: 1440) + 6,664,861,082 cycles # 2.254 GHz + 12,863,872,119 instructions # 1.93 insn per cycle + 2.958126027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052585973637E-002 Relative difference = 2.0158743040564767e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index bed528f6e7..5f8c460514 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:09:33 +DATE: 2024-09-18_12:10:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.185244e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.645179e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.802907e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.297995e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.821835e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.125593e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.581560 sec +TOTAL : 0.584822 sec INFO: No Floating Point Exceptions have been reported - 2,321,713,730 cycles # 2.869 GHz - 3,648,873,879 instructions # 1.57 insn per cycle - 0.865547071 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,340,511,556 cycles # 2.876 GHz + 3,573,310,904 instructions # 1.53 insn per cycle + 0.872056454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.067642e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.262437e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.262437e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.074691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.268219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268219e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.278398 sec +TOTAL : 6.225057 sec INFO: No Floating Point Exceptions have been reported - 18,394,901,899 cycles # 2.927 GHz - 45,013,341,285 instructions # 2.45 insn per cycle - 6.286516700 seconds time elapsed + 18,266,994,357 cycles # 2.932 GHz + 44,980,008,303 instructions # 2.46 insn per cycle + 6.230608513 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039854866802E-002 Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.249815e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.432877e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.432877e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.255829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.437463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.437463e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.188261 sec +TOTAL : 3.162961 sec INFO: No Floating Point Exceptions have been reported - 9,382,779,388 cycles # 2.937 GHz - 22,291,184,899 instructions # 2.38 insn per cycle - 3.196123670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1939) (avx2: 0) (512y: 0) (512z: 0) + 9,315,618,309 cycles # 2.941 GHz + 22,235,168,853 instructions # 2.39 insn per cycle + 3.168519289 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.394804e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683014e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.683014e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.414375e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.703911e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.703911e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.016616 sec +TOTAL : 2.977811 sec INFO: No Floating Point Exceptions have been reported - 8,501,260,075 cycles # 2.812 GHz - 15,791,303,131 instructions # 1.86 insn per cycle - 3.024850695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2539) (512y: 0) (512z: 0) + 8,430,687,956 cycles # 2.827 GHz + 15,749,443,583 instructions # 1.87 insn per cycle + 2.983247205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2540) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.433401e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.784502e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784502e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.463260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.781163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781163e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.973425 sec +TOTAL : 2.924999 sec INFO: No Floating Point Exceptions have been reported - 8,414,276,106 cycles # 2.823 GHz - 15,633,261,481 instructions # 1.86 insn per cycle - 2.981340876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2436) (512y: 12) (512z: 0) + 8,268,651,321 cycles # 2.823 GHz + 15,583,986,651 instructions # 1.88 insn per cycle + 2.930392056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 10) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053255361738E-002 Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.449856e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.750896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.750896e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.442819e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745195e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.962493 sec +TOTAL : 2.947951 sec INFO: No Floating Point Exceptions have been reported - 6,702,761,235 cycles # 2.257 GHz - 12,885,740,598 instructions # 1.92 insn per cycle - 2.970728824 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1667) (512y: 18) (512z: 1428) + 6,669,419,569 cycles # 2.259 GHz + 12,841,335,089 instructions # 1.93 insn per cycle + 2.953404356 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 16) (512z: 1427) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052564145764E-002 Relative difference = 1.9988585667912256e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 711fbf3a50..438f6c4f2f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:34:45 +DATE: 2024-09-18_12:43:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.272057e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.453757e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.411368e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.248809e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.661013e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.608831e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.577320 sec +TOTAL : 0.584963 sec INFO: No Floating Point Exceptions have been reported - 2,322,367,280 cycles # 2.881 GHz - 3,616,476,077 instructions # 1.56 insn per cycle - 0.862621614 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 + 2,341,003,807 cycles # 2.871 GHz + 3,637,581,249 instructions # 1.55 insn per cycle + 0.872273356 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.635657e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.129532e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.129532e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.610499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.089750e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089750e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.218771 sec +TOTAL : 4.278573 sec INFO: No Floating Point Exceptions have been reported - 12,191,913,623 cycles # 2.887 GHz - 32,293,306,178 instructions # 2.65 insn per cycle - 4.224304323 seconds time elapsed + 12,205,449,516 cycles # 2.850 GHz + 32,295,858,353 instructions # 2.65 insn per cycle + 4.284066796 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039840314887E-002 Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.654215e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.464911e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.464911e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.650495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.446725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.446725e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.746425 sec +TOTAL : 2.750577 sec INFO: No Floating Point Exceptions have been reported - 8,013,864,577 cycles # 2.914 GHz - 18,725,751,725 instructions # 2.34 insn per cycle - 2.751635696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2: 0) (512y: 0) (512z: 0) + 8,071,356,692 cycles # 2.929 GHz + 18,687,842,971 instructions # 2.32 insn per cycle + 2.756173554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039283704129E-002 Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.734762e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.516819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.516819e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.785833e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615036e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615036e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.676523 sec +TOTAL : 2.627067 sec INFO: No Floating Point Exceptions have been reported - 7,476,186,846 cycles # 2.791 GHz - 14,257,923,546 instructions # 1.91 insn per cycle - 2.682062632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) + 7,450,918,918 cycles # 2.831 GHz + 14,249,285,643 instructions # 1.91 insn per cycle + 2.632635594 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2234) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.834242e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.778618e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.778618e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.828862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.718189e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.718189e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.592675 sec +TOTAL : 2.597300 sec INFO: No Floating Point Exceptions have been reported - 7,344,696,907 cycles # 2.828 GHz - 13,952,931,831 instructions # 1.90 insn per cycle - 2.598198803 seconds time elapsed + 7,335,966,912 cycles # 2.820 GHz + 13,949,163,288 instructions # 1.90 insn per cycle + 2.602858413 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053244447801E-002 Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.491060e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.875896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.875896e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.491639e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.833175e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.833175e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.903149 sec +TOTAL : 2.900511 sec INFO: No Floating Point Exceptions have been reported - 6,571,286,194 cycles # 2.260 GHz - 13,433,545,963 instructions # 2.04 insn per cycle - 2.908820313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2077) (512y: 1) (512z: 1199) + 6,563,891,996 cycles # 2.259 GHz + 13,436,075,613 instructions # 2.05 insn per cycle + 2.906157600 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1201) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052562326775E-002 Relative difference = 1.997440588685788e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 6fc527ffa1..2bd01da79a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:35:09 +DATE: 2024-09-18_12:43:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.289380e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.618768e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.817817e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.260194e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.691839e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.932675e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.580301 sec +TOTAL : 0.585616 sec INFO: No Floating Point Exceptions have been reported - 2,323,629,755 cycles # 2.848 GHz - 3,593,641,981 instructions # 1.55 insn per cycle - 0.873274895 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 + 2,337,485,665 cycles # 2.875 GHz + 3,652,863,320 instructions # 1.56 insn per cycle + 0.871732359 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 Avg ME (F77/GPU) = 1.2828112125134794E-002 Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.209448e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.220446e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.220446e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.208067e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.235106e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235106e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.220192 sec +TOTAL : 3.219491 sec INFO: No Floating Point Exceptions have been reported - 9,366,774,041 cycles # 2.905 GHz - 25,702,432,609 instructions # 2.74 insn per cycle - 3.225730639 seconds time elapsed + 9,405,085,609 cycles # 2.917 GHz + 25,703,807,777 instructions # 2.73 insn per cycle + 3.224847546 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039838495897E-002 Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.014104e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.557363e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.557363e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.972603e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.428852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.428852e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.462960 sec +TOTAL : 2.491785 sec INFO: No Floating Point Exceptions have been reported - 7,216,847,131 cycles # 2.925 GHz - 16,891,846,951 instructions # 2.34 insn per cycle - 2.468502980 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2: 0) (512y: 0) (512z: 0) + 7,313,494,275 cycles # 2.930 GHz + 16,767,205,281 instructions # 2.29 insn per cycle + 2.497135576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039280066150E-002 Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.924187e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.020326e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.020326e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.941057e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.047750e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047750e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.522608 sec +TOTAL : 2.511970 sec INFO: No Floating Point Exceptions have been reported - 7,150,122,380 cycles # 2.829 GHz - 13,633,449,373 instructions # 1.91 insn per cycle - 2.528205937 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2061) (512y: 0) (512z: 0) + 7,127,612,921 cycles # 2.833 GHz + 13,657,719,583 instructions # 1.92 insn per cycle + 2.517264213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2067) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.976818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.175866e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.175866e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.994854e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.186874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.186874e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.485267 sec +TOTAL : 2.472132 sec INFO: No Floating Point Exceptions have been reported - 7,047,642,186 cycles # 2.830 GHz - 13,442,931,038 instructions # 1.91 insn per cycle - 2.490839699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1940) (512y: 4) (512z: 0) + 7,033,406,697 cycles # 2.840 GHz + 13,451,133,295 instructions # 1.91 insn per cycle + 2.477643200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1935) (512y: 7) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828053220800939E-002 Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.604837e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.103202e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.103202e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.610829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.126124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126124e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.790744 sec +TOTAL : 2.783843 sec INFO: No Floating Point Exceptions have been reported - 6,349,721,778 cycles # 2.272 GHz - 13,164,680,615 instructions # 2.07 insn per cycle - 2.796235299 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2033) (512y: 1) (512z: 1085) + 6,358,284,694 cycles # 2.280 GHz + 13,173,247,957 instructions # 2.07 insn per cycle + 2.789438831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 2) (512z: 1081) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 Avg ME (F77/C++) = 1.2828052536860923E-002 Relative difference = 1.977588895209662e-07 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index caa67d1a4c..041f4e9efd 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:10:00 +DATE: 2024-09-18_12:10:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.610039e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.567106e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.762593e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.877042e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.852998e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.688497 sec +TOTAL : 0.677390 sec INFO: No Floating Point Exceptions have been reported - 2,665,329,968 cycles # 2.845 GHz - 4,055,682,627 instructions # 1.52 insn per cycle - 0.995154695 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,627,954,813 cycles # 2.876 GHz + 4,055,520,615 instructions # 1.54 insn per cycle + 0.972709824 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039901590279E-002 Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.002881e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168467e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.168467e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.020205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.187124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187124e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.726627 sec +TOTAL : 6.577075 sec INFO: No Floating Point Exceptions have been reported - 19,724,365,546 cycles # 2.928 GHz - 46,388,641,620 instructions # 2.35 insn per cycle - 6.737968541 seconds time elapsed + 19,371,933,844 cycles # 2.944 GHz + 46,278,733,907 instructions # 2.39 insn per cycle + 6.582537613 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.617185e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.143896e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143896e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.635520e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155996e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.347629 sec +TOTAL : 4.262104 sec INFO: No Floating Point Exceptions have been reported - 12,771,945,524 cycles # 2.931 GHz - 31,577,972,239 instructions # 2.47 insn per cycle - 4.359278192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1719) (avx2: 0) (512y: 0) (512z: 0) + 12,531,950,606 cycles # 2.937 GHz + 31,465,132,198 instructions # 2.51 insn per cycle + 4.267832274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.943978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.720569e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.720569e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.976062e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.756066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756066e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.688531 sec +TOTAL : 3.592683 sec INFO: No Floating Point Exceptions have been reported - 10,322,037,008 cycles # 2.790 GHz - 19,570,801,424 instructions # 1.90 insn per cycle - 3.699996508 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2042) (512y: 0) (512z: 0) + 10,114,837,946 cycles # 2.812 GHz + 19,479,113,850 instructions # 1.93 insn per cycle + 3.598394582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.986657e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.789520e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789520e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.011048e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815376e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.618212 sec +TOTAL : 3.535103 sec INFO: No Floating Point Exceptions have been reported - 10,149,499,266 cycles # 2.797 GHz - 19,312,096,557 instructions # 1.90 insn per cycle - 3.629679706 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1785) (512y: 189) (512z: 0) + 9,996,837,570 cycles # 2.824 GHz + 19,291,566,393 instructions # 1.93 insn per cycle + 3.540686440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1799) (512y: 188) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.777391e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385382e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385382e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.782393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.383775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383775e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.992684 sec +TOTAL : 3.939537 sec INFO: No Floating Point Exceptions have been reported - 8,588,251,503 cycles # 2.146 GHz - 15,161,251,122 instructions # 1.77 insn per cycle - 4.003805537 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 954) (512y: 154) (512z: 1322) + 8,379,017,732 cycles # 2.125 GHz + 15,108,594,606 instructions # 1.80 insn per cycle + 3.945372714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 966) (512y: 154) (512z: 1330) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index ce1b16067d..63e5511d98 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-15_11:10:30 +DATE: 2024-09-18_12:11:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.695377e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.640031e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.828039e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.941580e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659467e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829628e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.676129 sec +TOTAL : 0.674748 sec INFO: No Floating Point Exceptions have been reported - 2,632,225,960 cycles # 2.883 GHz - 4,132,384,248 instructions # 1.57 insn per cycle - 0.970493332 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,621,919,128 cycles # 2.880 GHz + 4,081,332,751 instructions # 1.56 insn per cycle + 0.969735396 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 Avg ME (F77/GPU) = 1.2828039901590279E-002 Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.005676e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176899e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.176899e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.022324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188868e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.735465 sec +TOTAL : 6.564992 sec INFO: No Floating Point Exceptions have been reported - 19,720,225,593 cycles # 2.924 GHz - 46,326,489,596 instructions # 2.35 insn per cycle - 6.746197968 seconds time elapsed + 19,266,332,416 cycles # 2.933 GHz + 46,212,690,278 instructions # 2.40 insn per cycle + 6.570664425 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.574820e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.144081e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.144081e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.631635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.147723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147723e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.449866 sec +TOTAL : 4.273260 sec INFO: No Floating Point Exceptions have been reported - 13,065,779,841 cycles # 2.930 GHz - 31,555,443,434 instructions # 2.42 insn per cycle - 4.460852067 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1711) (avx2: 0) (512y: 0) (512z: 0) + 12,565,193,084 cycles # 2.937 GHz + 31,464,303,429 instructions # 2.50 insn per cycle + 4.278983280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.952135e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.730440e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.730440e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.965569e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.737055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737055e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.677079 sec +TOTAL : 3.608853 sec INFO: No Floating Point Exceptions have been reported - 10,320,566,663 cycles # 2.800 GHz - 19,557,785,526 instructions # 1.90 insn per cycle - 3.688245631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2026) (512y: 0) (512z: 0) + 10,149,451,908 cycles # 2.809 GHz + 19,494,245,478 instructions # 1.92 insn per cycle + 3.614638314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.981919e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.782784e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.782784e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.020584e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826510e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826510e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.626089 sec +TOTAL : 3.523442 sec INFO: No Floating Point Exceptions have been reported - 10,150,645,903 cycles # 2.793 GHz - 19,388,040,023 instructions # 1.91 insn per cycle - 3.637342012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1779) (512y: 189) (512z: 0) + 9,922,226,767 cycles # 2.813 GHz + 19,194,396,105 instructions # 1.93 insn per cycle + 3.529032291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1766) (512y: 191) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.806136e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.449559e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.449559e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.850816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505094e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505094e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.931921 sec +TOTAL : 3.808968 sec INFO: No Floating Point Exceptions have been reported - 8,442,748,276 cycles # 2.150 GHz - 15,068,523,446 instructions # 1.78 insn per cycle - 3.943167549 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 947) (512y: 156) (512z: 1306) + 8,221,926,837 cycles # 2.156 GHz + 14,966,457,412 instructions # 1.82 insn per cycle + 3.814643788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 959) (512y: 155) (512z: 1296) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index aeadfaae64..d77862b8c7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:11:01 +DATE: 2024-09-18_12:11:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.391981e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.330443e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949573e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.432691e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.350673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001727e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.534465 sec +TOTAL : 0.536474 sec INFO: No Floating Point Exceptions have been reported - 2,210,485,527 cycles # 2.869 GHz - 3,136,829,588 instructions # 1.42 insn per cycle - 0.828432932 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,210,506,804 cycles # 2.873 GHz + 3,172,337,100 instructions # 1.44 insn per cycle + 0.829286366 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.818281e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865529e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865529e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.830003e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.876741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876741e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.911600 sec +TOTAL : 5.837147 sec INFO: No Floating Point Exceptions have been reported - 17,389,649,504 cycles # 2.935 GHz - 46,036,709,188 instructions # 2.65 insn per cycle - 5.925127688 seconds time elapsed + 17,232,906,357 cycles # 2.950 GHz + 45,930,941,627 instructions # 2.67 insn per cycle + 5.842851386 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.165855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.325075e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.213968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.373677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.373677e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.453922 sec +TOTAL : 3.369805 sec INFO: No Floating Point Exceptions have been reported - 10,171,046,914 cycles # 2.936 GHz - 27,937,548,503 instructions # 2.75 insn per cycle - 3.465600263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) + 9,944,028,092 cycles # 2.947 GHz + 27,848,243,801 instructions # 2.80 insn per cycle + 3.375396234 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.967598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.358232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.358232e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.005348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.393032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.393032e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.252729 sec +TOTAL : 2.203017 sec INFO: No Floating Point Exceptions have been reported - 6,219,848,194 cycles # 2.748 GHz - 12,677,070,824 instructions # 2.04 insn per cycle - 2.263945260 seconds time elapsed + 6,092,356,881 cycles # 2.759 GHz + 12,580,147,933 instructions # 2.06 insn per cycle + 2.208781826 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.478036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.948706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.948706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.533405e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010418e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.055945 sec +TOTAL : 2.002945 sec INFO: No Floating Point Exceptions have been reported - 5,693,440,562 cycles # 2.756 GHz - 12,116,317,958 instructions # 2.13 insn per cycle - 2.067013514 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) + 5,570,120,100 cycles # 2.774 GHz + 12,019,792,186 instructions # 2.16 insn per cycle + 2.008867487 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.483396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.667202e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.667202e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.539179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.725857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725857e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.151048 sec +TOTAL : 3.069457 sec INFO: No Floating Point Exceptions have been reported - 5,836,401,977 cycles # 1.846 GHz - 8,391,475,751 instructions # 1.44 insn per cycle - 3.162234928 seconds time elapsed + 5,709,813,977 cycles # 1.857 GHz + 8,292,916,903 instructions # 1.45 insn per cycle + 3.075340516 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 9022013b0c..ac7eb7abb8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:46:12 +DATE: 2024-09-18_12:52:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.381047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.782856e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.782856e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.492890e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.985153e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.985153e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.840023 sec +TOTAL : 0.825573 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,108,426,192 cycles # 2.861 GHz - 4,770,924,698 instructions # 1.53 insn per cycle - 1.146594198 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge + 3,089,630,348 cycles # 2.884 GHz + 4,704,003,879 instructions # 1.52 insn per cycle + 1.129956624 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.806159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.852377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.852377e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.810470e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.856841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856841e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 6.067320 sec +TOTAL : 5.979891 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 17,856,813,573 cycles # 2.936 GHz - 46,243,571,751 instructions # 2.59 insn per cycle - 6.083398130 seconds time elapsed + 17,636,224,981 cycles # 2.947 GHz + 46,002,491,255 instructions # 2.61 insn per cycle + 5.986641580 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.134236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.289007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.289007e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.162709e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.318081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318081e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.611222 sec +TOTAL : 3.508343 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,640,342,598 cycles # 2.934 GHz - 28,274,377,614 instructions # 2.66 insn per cycle - 3.627839941 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) + 10,301,947,786 cycles # 2.931 GHz + 28,031,926,381 instructions # 2.72 insn per cycle + 3.516023780 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.918119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.295386e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.295386e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.911481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286736e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.398079 sec +TOTAL : 2.328474 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,685,318,792 cycles # 2.770 GHz - 13,122,453,026 instructions # 1.96 insn per cycle - 2.414568983 seconds time elapsed + 6,480,879,664 cycles # 2.775 GHz + 12,869,228,758 instructions # 1.99 insn per cycle + 2.336129053 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.370026e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.821205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.821205e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.391900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.844641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.844641e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.218649 sec +TOTAL : 2.137350 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,189,711,182 cycles # 2.770 GHz - 12,557,371,407 instructions # 2.03 insn per cycle - 2.235322482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) + 5,935,186,233 cycles # 2.768 GHz + 12,309,185,637 instructions # 2.07 insn per cycle + 2.144981542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.469847e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.650575e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.650575e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.478793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.660239e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660239e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.290950 sec +TOTAL : 3.208412 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,320,487,640 cycles # 1.913 GHz - 8,791,643,966 instructions # 1.39 insn per cycle - 3.307886654 seconds time elapsed + 6,086,695,352 cycles # 1.893 GHz + 8,539,357,346 instructions # 1.40 insn per cycle + 3.215882461 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 85f95aac4c..43a1422029 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:58:30 +DATE: 2024-09-18_13:05:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.424217e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.466051e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.011391e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.294862e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.316742e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.978216e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.646400 sec +TOTAL : 0.632097 sec INFO: No Floating Point Exceptions have been reported - 2,528,248,856 cycles # 2.879 GHz - 3,688,196,917 instructions # 1.46 insn per cycle - 0.934986871 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common + 2,509,027,611 cycles # 2.881 GHz + 3,623,648,413 instructions # 1.44 insn per cycle + 0.928416005 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.819736e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.867152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.867152e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.823170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.870043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870043e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.985044 sec +TOTAL : 5.922002 sec INFO: No Floating Point Exceptions have been reported - 17,600,111,411 cycles # 2.936 GHz - 46,124,554,790 instructions # 2.62 insn per cycle - 5.995774241 seconds time elapsed + 17,445,049,338 cycles # 2.943 GHz + 45,950,504,380 instructions # 2.63 insn per cycle + 5.927754556 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.168097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.326675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.326675e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.206424e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368310e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.533047 sec +TOTAL : 3.440040 sec INFO: No Floating Point Exceptions have been reported - 10,405,596,477 cycles # 2.937 GHz - 28,016,084,485 instructions # 2.69 insn per cycle - 3.543840924 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) + 10,134,263,801 cycles # 2.942 GHz + 27,846,437,463 instructions # 2.75 insn per cycle + 3.446069209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.000915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.392912e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.392912e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.972916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.355947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.355947e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.319991 sec +TOTAL : 2.278924 sec INFO: No Floating Point Exceptions have been reported - 6,443,899,577 cycles # 2.766 GHz - 12,743,367,354 instructions # 1.98 insn per cycle - 2.330731733 seconds time elapsed + 6,293,574,887 cycles # 2.756 GHz + 12,563,410,868 instructions # 2.00 insn per cycle + 2.284852020 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.453355e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.917864e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.917864e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.484228e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.952695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.952695e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.148765 sec +TOTAL : 2.082886 sec INFO: No Floating Point Exceptions have been reported - 5,965,699,512 cycles # 2.764 GHz - 12,146,978,501 instructions # 2.04 insn per cycle - 2.159407986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) + 5,796,540,715 cycles # 2.776 GHz + 11,970,685,605 instructions # 2.07 insn per cycle + 2.088838177 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.499391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.683844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.683844e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.533537e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.719277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.719277e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.221377 sec +TOTAL : 3.139756 sec INFO: No Floating Point Exceptions have been reported - 6,080,803,082 cycles # 1.882 GHz - 8,423,087,351 instructions # 1.39 insn per cycle - 3.232264502 seconds time elapsed + 5,897,468,368 cycles # 1.875 GHz + 8,242,833,828 instructions # 1.40 insn per cycle + 3.145931095 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index a207bf6969..4d0fbacd91 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:55:37 +DATE: 2024-09-18_13:02:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.567312e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.455238e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007252e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.273490e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.332144e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.976419e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.563061 sec +TOTAL : 0.567405 sec INFO: No Floating Point Exceptions have been reported - 2,285,403,688 cycles # 2.879 GHz - 3,587,140,832 instructions # 1.57 insn per cycle - 0.850548380 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 2,317,717,097 cycles # 2.884 GHz + 3,603,757,599 instructions # 1.55 insn per cycle + 0.860500177 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.818247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865618e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.822880e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.870340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870340e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.910910 sec +TOTAL : 5.861471 sec INFO: No Floating Point Exceptions have been reported - 17,366,644,102 cycles # 2.933 GHz - 46,048,599,584 instructions # 2.65 insn per cycle - 5.921509733 seconds time elapsed + 17,231,534,650 cycles # 2.937 GHz + 45,932,602,629 instructions # 2.67 insn per cycle + 5.867633947 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.165029e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.323362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.323362e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.195694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.355589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.355589e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.462860 sec +TOTAL : 3.390529 sec INFO: No Floating Point Exceptions have been reported - 10,189,100,613 cycles # 2.933 GHz - 27,958,354,355 instructions # 2.74 insn per cycle - 3.474989469 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) + 9,982,800,981 cycles # 2.940 GHz + 27,850,403,920 instructions # 2.79 insn per cycle + 3.396205784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.979916e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.371493e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.371493e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.027954e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.417032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.417032e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.251622 sec +TOTAL : 2.193317 sec INFO: No Floating Point Exceptions have been reported - 6,239,671,348 cycles # 2.759 GHz - 12,698,985,692 instructions # 2.04 insn per cycle - 2.262379977 seconds time elapsed + 6,072,828,383 cycles # 2.762 GHz + 12,580,752,660 instructions # 2.07 insn per cycle + 2.199291134 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.471204e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.935728e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.935728e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.459397e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.922046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.922046e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.061624 sec +TOTAL : 2.029275 sec INFO: No Floating Point Exceptions have been reported - 5,721,552,525 cycles # 2.762 GHz - 12,134,935,244 instructions # 2.12 insn per cycle - 2.072280515 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) + 5,590,179,855 cycles # 2.748 GHz + 12,020,448,396 instructions # 2.15 insn per cycle + 2.035272328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.500472e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684328e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684328e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.512487e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697486e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.142449 sec +TOTAL : 3.093387 sec INFO: No Floating Point Exceptions have been reported - 5,837,185,756 cycles # 1.853 GHz - 8,411,639,410 instructions # 1.44 insn per cycle - 3.153001098 seconds time elapsed + 5,720,535,090 cycles # 1.846 GHz + 8,293,713,066 instructions # 1.45 insn per cycle + 3.099385086 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index fa08fbada3..06cd2419c8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:52:49 +DATE: 2024-09-18_12:59:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.726431e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.401767e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.003069e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.799214e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.349130e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.974876e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.719407 sec +TOTAL : 0.720471 sec INFO: No Floating Point Exceptions have been reported - 2,718,962,196 cycles # 2.853 GHz - 4,261,744,999 instructions # 1.57 insn per cycle - 1.009279183 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst + 2,751,096,444 cycles # 2.885 GHz + 4,339,626,159 instructions # 1.58 insn per cycle + 1.011336555 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.804236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.851320e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.851320e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.816637e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.863801e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863801e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.958188 sec +TOTAL : 5.879265 sec INFO: No Floating Point Exceptions have been reported - 17,386,557,790 cycles # 2.923 GHz - 46,053,036,463 instructions # 2.65 insn per cycle - 5.968882862 seconds time elapsed + 17,261,479,917 cycles # 2.934 GHz + 45,935,121,768 instructions # 2.66 insn per cycle + 5.884988360 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.150158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308857e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.202828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.362707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.362707e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.477756 sec +TOTAL : 3.382024 sec INFO: No Floating Point Exceptions have been reported - 10,179,732,087 cycles # 2.919 GHz - 27,956,952,229 instructions # 2.75 insn per cycle - 3.488776572 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2: 0) (512y: 0) (512z: 0) + 9,945,427,320 cycles # 2.936 GHz + 27,847,352,314 instructions # 2.80 insn per cycle + 3.387994978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.931728e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.313640e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.313640e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.949448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.331919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.331919e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.272950 sec +TOTAL : 2.228024 sec INFO: No Floating Point Exceptions have been reported - 6,250,280,118 cycles # 2.738 GHz - 12,699,256,189 instructions # 2.03 insn per cycle - 2.283692354 seconds time elapsed + 6,117,137,090 cycles # 2.739 GHz + 12,580,569,234 instructions # 2.06 insn per cycle + 2.234097878 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.454076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.923276e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.923276e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.342003e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.785664e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.785664e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.069149 sec +TOTAL : 2.072519 sec INFO: No Floating Point Exceptions have been reported - 5,725,799,884 cycles # 2.754 GHz - 12,135,179,967 instructions # 2.12 insn per cycle - 2.079783939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2357) (512y: 144) (512z: 0) + 5,591,470,515 cycles # 2.691 GHz + 12,020,476,993 instructions # 2.15 insn per cycle + 2.078517041 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.414550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.592343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.592343e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.530876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.717281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717281e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.218110 sec +TOTAL : 3.077997 sec INFO: No Floating Point Exceptions have been reported - 5,959,079,250 cycles # 1.847 GHz - 8,422,189,176 instructions # 1.41 insn per cycle - 3.228977501 seconds time elapsed + 5,702,073,376 cycles # 1.850 GHz + 8,294,780,221 instructions # 1.45 insn per cycle + 3.083993360 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 328467ef63..a4f203143e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:11:27 +DATE: 2024-09-18_12:11:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.368553e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.328924e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.965685e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.820817e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.978279e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.339111e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.531897 sec +TOTAL : 0.700005 sec INFO: No Floating Point Exceptions have been reported - 2,204,349,816 cycles # 2.864 GHz - 3,169,634,690 instructions # 1.44 insn per cycle - 0.825649601 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,762,648,255 cycles # 2.857 GHz + 3,086,101,973 instructions # 1.12 insn per cycle + 1.026825767 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.870268e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919754e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919754e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.875216e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.924982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.924982e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.735202 sec +TOTAL : 5.700425 sec INFO: No Floating Point Exceptions have been reported - 16,847,713,247 cycles # 2.933 GHz - 44,981,738,957 instructions # 2.67 insn per cycle - 5.744748484 seconds time elapsed + 16,757,702,666 cycles # 2.937 GHz + 44,923,641,547 instructions # 2.68 insn per cycle + 5.706326125 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.300705e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.473658e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.473658e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.370762e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.546946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.546946e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.304655 sec +TOTAL : 3.219225 sec INFO: No Floating Point Exceptions have been reported - 9,659,083,497 cycles # 2.916 GHz - 26,749,720,361 instructions # 2.77 insn per cycle - 3.314062418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2328) (avx2: 0) (512y: 0) (512z: 0) + 9,494,791,570 cycles # 2.945 GHz + 26,687,379,503 instructions # 2.81 insn per cycle + 3.225069589 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.583161e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.907950e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.907950e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.607569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.929909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.929909e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.415079 sec +TOTAL : 2.383333 sec INFO: No Floating Point Exceptions have been reported - 6,675,336,151 cycles # 2.753 GHz - 14,174,925,457 instructions # 2.12 insn per cycle - 2.425332683 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2710) (512y: 0) (512z: 0) + 6,604,949,302 cycles # 2.766 GHz + 14,119,001,234 instructions # 2.14 insn per cycle + 2.388928721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.674600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.017717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.017717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.803756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.157173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.157173e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.378233 sec +TOTAL : 2.289999 sec INFO: No Floating Point Exceptions have been reported - 6,574,155,578 cycles # 2.754 GHz - 13,789,180,928 instructions # 2.10 insn per cycle - 2.388565062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 297) (512z: 0) + 6,348,634,731 cycles # 2.767 GHz + 13,715,767,912 instructions # 2.16 insn per cycle + 2.295499005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 298) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.383516e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.554686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.554686e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.387276e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.557456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.557456e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.229855 sec +TOTAL : 3.203296 sec INFO: No Floating Point Exceptions have been reported - 5,994,389,719 cycles # 1.851 GHz - 10,123,629,860 instructions # 1.69 insn per cycle - 3.240029027 seconds time elapsed + 5,911,433,799 cycles # 1.843 GHz + 10,058,967,230 instructions # 1.70 insn per cycle + 3.209029605 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 2da881e2b3..797e37fdb1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:35:31 +DATE: 2024-09-18_12:43:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.302471e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.316570e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001729e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.310192e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.359217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.986325e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.535501 sec +TOTAL : 0.536645 sec INFO: No Floating Point Exceptions have been reported - 2,215,078,191 cycles # 2.874 GHz - 3,154,679,095 instructions # 1.42 insn per cycle - 0.829146554 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 + 2,216,199,851 cycles # 2.870 GHz + 3,159,776,582 instructions # 1.43 insn per cycle + 0.831121874 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.351910e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.430033e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.430033e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.421869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505515e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505515e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.571525 sec +TOTAL : 4.438528 sec INFO: No Floating Point Exceptions have been reported - 13,035,401,690 cycles # 2.848 GHz - 34,355,905,973 instructions # 2.64 insn per cycle - 4.578322526 seconds time elapsed + 13,015,204,187 cycles # 2.929 GHz + 34,341,759,533 instructions # 2.64 insn per cycle + 4.444441151 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.974205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.110138e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.110138e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.982901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119934e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119934e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.639722 sec +TOTAL : 3.624625 sec INFO: No Floating Point Exceptions have been reported - 10,720,308,622 cycles # 2.941 GHz - 24,027,850,859 instructions # 2.24 insn per cycle - 3.646936507 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) + 10,679,803,279 cycles # 2.942 GHz + 24,245,188,333 instructions # 2.27 insn per cycle + 3.630600501 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.622097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.949479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.949479e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.555816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.876140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.876140e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.385273 sec +TOTAL : 2.410266 sec INFO: No Floating Point Exceptions have been reported - 6,607,425,584 cycles # 2.762 GHz - 12,368,604,074 instructions # 1.87 insn per cycle - 2.392729796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3103) (512y: 0) (512z: 0) + 6,676,895,845 cycles # 2.765 GHz + 12,404,391,789 instructions # 1.86 insn per cycle + 2.415872101 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.850394e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.302393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.302393e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.932497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.306284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.306284e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.279711 sec +TOTAL : 2.233356 sec INFO: No Floating Point Exceptions have been reported - 6,291,615,830 cycles # 2.752 GHz - 11,595,311,145 instructions # 1.84 insn per cycle - 2.287442889 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2648) (512y: 239) (512z: 0) + 6,172,218,152 cycles # 2.758 GHz + 11,544,853,425 instructions # 1.87 insn per cycle + 2.239017897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2644) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.743982e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.952525e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.952525e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.760390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.970863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.970863e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.921108 sec +TOTAL : 2.895519 sec INFO: No Floating Point Exceptions have been reported - 5,423,773,794 cycles # 1.852 GHz - 9,310,782,229 instructions # 1.72 insn per cycle - 2.929084188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2096) (512y: 282) (512z: 1955) + 5,386,476,820 cycles # 1.857 GHz + 9,291,001,680 instructions # 1.72 insn per cycle + 2.901312030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2099) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 86df224c90..af0c8fa098 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:35:55 +DATE: 2024-09-18_12:44:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.255241e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.245059e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949681e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.294016e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.195619e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.822974e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.533151 sec +TOTAL : 0.537359 sec INFO: No Floating Point Exceptions have been reported - 2,204,451,822 cycles # 2.864 GHz - 3,124,206,943 instructions # 1.42 insn per cycle - 0.826948420 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 + 2,212,895,393 cycles # 2.861 GHz + 3,167,520,059 instructions # 1.43 insn per cycle + 0.832101772 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063388516822 Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.566402e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.659710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.659710e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.565164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657330e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.199423 sec +TOTAL : 4.194388 sec INFO: No Floating Point Exceptions have been reported - 12,339,181,649 cycles # 2.934 GHz - 34,922,451,175 instructions # 2.83 insn per cycle - 4.206438418 seconds time elapsed + 12,320,787,698 cycles # 2.934 GHz + 34,912,998,062 instructions # 2.83 insn per cycle + 4.200192046 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.977481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.113067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.113067e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.989812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.127480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.127480e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.637534 sec +TOTAL : 3.616035 sec INFO: No Floating Point Exceptions have been reported - 10,710,920,290 cycles # 2.939 GHz - 23,032,620,692 instructions # 2.15 insn per cycle - 3.644897421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2340) (avx2: 0) (512y: 0) (512z: 0) + 10,626,604,482 cycles # 2.935 GHz + 23,338,496,545 instructions # 2.20 insn per cycle + 3.621790672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.909385e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.279214e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.279214e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.054894e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.447738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.447738e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.252873 sec +TOTAL : 2.181056 sec INFO: No Floating Point Exceptions have been reported - 6,212,002,997 cycles # 2.749 GHz - 11,978,645,016 instructions # 1.93 insn per cycle - 2.260347594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2491) (512y: 0) (512z: 0) + 6,051,059,717 cycles # 2.768 GHz + 11,860,809,289 instructions # 1.96 insn per cycle + 2.186772408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2468) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.039032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.423530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.423530e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.028106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.414371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.414371e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.196002 sec +TOTAL : 2.192869 sec INFO: No Floating Point Exceptions have been reported - 6,062,556,643 cycles # 2.753 GHz - 11,146,456,018 instructions # 1.84 insn per cycle - 2.203425956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2103) (512y: 174) (512z: 0) + 6,064,121,206 cycles # 2.759 GHz + 11,098,432,522 instructions # 1.83 insn per cycle + 2.198761953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2098) (512y: 174) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.871031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.091641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.091641e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.876416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.107845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107845e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.826553 sec +TOTAL : 2.813537 sec INFO: No Floating Point Exceptions have been reported - 5,265,704,314 cycles # 1.859 GHz - 9,046,022,125 instructions # 1.72 insn per cycle - 2.834187629 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1571) + 5,237,838,464 cycles # 1.858 GHz + 9,015,066,552 instructions # 1.72 insn per cycle + 2.819357375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index e1d11759a7..0cce370026 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:11:52 +DATE: 2024-09-18_12:12:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.179768e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.708203e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.827426e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.285654e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.744544e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855248e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.491224 sec +TOTAL : 0.489810 sec INFO: No Floating Point Exceptions have been reported - 2,056,714,115 cycles # 2.865 GHz - 2,916,773,309 instructions # 1.42 insn per cycle - 0.776029796 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,058,086,051 cycles # 2.871 GHz + 2,937,778,801 instructions # 1.43 insn per cycle + 0.774500335 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.918838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.972832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.972832e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.924099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978298e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978298e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.552302 sec +TOTAL : 5.535123 sec INFO: No Floating Point Exceptions have been reported - 16,247,282,670 cycles # 2.924 GHz - 45,328,928,537 instructions # 2.79 insn per cycle - 5.557963082 seconds time elapsed + 16,260,554,497 cycles # 2.935 GHz + 45,332,637,380 instructions # 2.79 insn per cycle + 5.540566072 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.529514e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.866293e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.866293e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.537932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.874791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.874791e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.402922 sec +TOTAL : 2.397466 sec INFO: No Floating Point Exceptions have been reported - 7,055,912,070 cycles # 2.931 GHz - 17,768,218,222 instructions # 2.52 insn per cycle - 2.408607319 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) + 7,088,165,806 cycles # 2.951 GHz + 17,790,594,363 instructions # 2.51 insn per cycle + 2.403188687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.299822e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.410195e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.410195e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.392634e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.540507e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.540507e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.350007 sec +TOTAL : 1.334609 sec INFO: No Floating Point Exceptions have been reported - 3,747,874,250 cycles # 2.767 GHz - 8,260,976,747 instructions # 2.20 insn per cycle - 1.355686963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) + 3,736,094,091 cycles # 2.789 GHz + 8,261,313,611 instructions # 2.21 insn per cycle + 1.340132908 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.794382e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005480e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.862239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.012505e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012505e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.278305 sec +TOTAL : 1.268798 sec INFO: No Floating Point Exceptions have been reported - 3,550,706,297 cycles # 2.767 GHz - 7,915,681,558 instructions # 2.23 insn per cycle - 1.284036639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) + 3,543,869,427 cycles # 2.783 GHz + 7,911,503,214 instructions # 2.23 insn per cycle + 1.274261347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.489307e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.134354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.134354e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.491068e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.141806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.141806e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.705925 sec +TOTAL : 1.701721 sec INFO: No Floating Point Exceptions have been reported - 3,272,576,419 cycles # 1.913 GHz - 6,103,138,487 instructions # 1.86 insn per cycle - 1.712010321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) + 3,270,419,298 cycles # 1.917 GHz + 6,095,745,028 instructions # 1.86 insn per cycle + 1.707211646 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 4785fec175..5e7502fc17 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:46:39 +DATE: 2024-09-18_12:53:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.985126e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.401873e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.401873e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.022210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.414163e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.414163e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.682896 sec +TOTAL : 0.683925 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,622,267,762 cycles # 2.874 GHz - 4,057,326,622 instructions # 1.55 insn per cycle - 0.970912293 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge + 2,644,974,332 cycles # 2.886 GHz + 4,089,078,726 instructions # 1.55 insn per cycle + 0.974029218 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.900194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.953379e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.953379e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.927229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.981708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.981708e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.681942 sec +TOTAL : 5.572760 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 16,671,667,536 cycles # 2.929 GHz - 45,497,192,820 instructions # 2.73 insn per cycle - 5.692941265 seconds time elapsed + 16,435,796,229 cycles # 2.946 GHz + 45,376,812,282 instructions # 2.76 insn per cycle + 5.580128034 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.487658e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829841e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.483217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.814609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814609e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.506047 sec +TOTAL : 2.475211 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 7,392,710,973 cycles # 2.938 GHz - 18,162,302,699 instructions # 2.46 insn per cycle - 2.517172183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) + 7,297,746,086 cycles # 2.941 GHz + 18,073,033,530 instructions # 2.48 insn per cycle + 2.482430942 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.160433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.262111e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.262111e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.199525e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.300829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.300829e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.454756 sec +TOTAL : 1.415446 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,077,667,192 cycles # 2.783 GHz - 8,611,395,195 instructions # 2.11 insn per cycle - 1.466232607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) + 3,953,896,804 cycles # 2.781 GHz + 8,500,905,843 instructions # 2.15 insn per cycle + 1.422523843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.644244e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.901065e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.901065e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.608107e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.919736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.919736e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.382676 sec +TOTAL : 1.364816 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,878,507,500 cycles # 2.784 GHz - 8,265,873,907 instructions # 2.13 insn per cycle - 1.393862906 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) + 3,828,677,143 cycles # 2.793 GHz + 8,155,232,689 instructions # 2.13 insn per cycle + 1.371531073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.387022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022226e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.398900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.033073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.033073e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.814510 sec +TOTAL : 1.777483 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,594,832,163 cycles # 1.970 GHz - 6,462,220,806 instructions # 1.80 insn per cycle - 1.825958297 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) + 3,485,580,348 cycles # 1.954 GHz + 6,352,386,091 instructions # 1.82 insn per cycle + 1.784705241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index ff1a0d1a39..7b3bdcf221 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:58:55 +DATE: 2024-09-18_13:05:38 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.402978e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.823028e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.958083e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.256953e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.707995e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.827629e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.595970 sec +TOTAL : 0.581626 sec INFO: No Floating Point Exceptions have been reported - 2,351,354,548 cycles # 2.876 GHz - 3,444,506,671 instructions # 1.46 insn per cycle - 0.875001238 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common + 2,320,591,922 cycles # 2.873 GHz + 3,370,044,879 instructions # 1.45 insn per cycle + 0.865525838 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.915967e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970379e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970379e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.923451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.977569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.977569e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.649395 sec +TOTAL : 5.596105 sec INFO: No Floating Point Exceptions have been reported - 16,580,294,518 cycles # 2.931 GHz - 45,471,304,380 instructions # 2.74 insn per cycle - 5.657307639 seconds time elapsed + 16,423,082,806 cycles # 2.932 GHz + 45,361,162,230 instructions # 2.76 insn per cycle + 5.601871750 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.539990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.880360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.880360e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.510624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.845954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.845954e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.485589 sec +TOTAL : 2.467999 sec INFO: No Floating Point Exceptions have been reported - 7,332,359,110 cycles # 2.941 GHz - 17,888,678,821 instructions # 2.44 insn per cycle - 2.493593708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) + 7,259,263,758 cycles # 2.936 GHz + 17,804,964,488 instructions # 2.45 insn per cycle + 2.473643333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.267444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.414148e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.414148e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.271097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.411462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.411462e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.445011 sec +TOTAL : 1.412437 sec INFO: No Floating Point Exceptions have been reported - 4,026,136,405 cycles # 2.773 GHz - 8,355,233,205 instructions # 2.08 insn per cycle - 1.452615562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) + 3,908,301,423 cycles # 2.757 GHz + 8,246,550,739 instructions # 2.11 insn per cycle + 1.418307229 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.729960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.002290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002290e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.660773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.926852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.926852e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.378766 sec +TOTAL : 1.356729 sec INFO: No Floating Point Exceptions have been reported - 3,842,420,700 cycles # 2.773 GHz - 7,976,068,469 instructions # 2.08 insn per cycle - 1.386498519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) + 3,755,878,291 cycles # 2.759 GHz + 7,864,539,547 instructions # 2.09 insn per cycle + 1.362169016 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.464492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.118413e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.118413e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.442823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.089629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.089629e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.801449 sec +TOTAL : 1.773182 sec INFO: No Floating Point Exceptions have been reported - 3,544,493,993 cycles # 1.960 GHz - 6,155,712,678 instructions # 1.74 insn per cycle - 1.809068044 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) + 3,435,797,893 cycles # 1.932 GHz + 6,046,565,657 instructions # 1.76 insn per cycle + 1.778888357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index d6446e7404..423fac7e32 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:56:02 +DATE: 2024-09-18_13:02:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.563506e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.776392e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.890773e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.120397e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.694282e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.817714e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.519623 sec +TOTAL : 0.524521 sec INFO: No Floating Point Exceptions have been reported - 2,142,971,519 cycles # 2.865 GHz - 3,298,601,891 instructions # 1.54 insn per cycle - 0.804804703 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst + 2,152,741,046 cycles # 2.863 GHz + 3,343,842,138 instructions # 1.55 insn per cycle + 0.808879979 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.921389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.976206e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.976206e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.913977e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.967735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967735e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.559284 sec +TOTAL : 5.564436 sec INFO: No Floating Point Exceptions have been reported - 16,314,564,993 cycles # 2.932 GHz - 45,379,598,894 instructions # 2.78 insn per cycle - 5.566847007 seconds time elapsed + 16,258,370,325 cycles # 2.920 GHz + 45,334,605,627 instructions # 2.79 insn per cycle + 5.570118312 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.507753e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.846242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.846242e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.499484e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.839377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.839377e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.429783 sec +TOTAL : 2.417592 sec INFO: No Floating Point Exceptions have been reported - 7,122,810,590 cycles # 2.924 GHz - 17,820,548,090 instructions # 2.50 insn per cycle - 2.437485624 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) + 7,096,627,782 cycles # 2.930 GHz + 17,791,937,206 instructions # 2.51 insn per cycle + 2.423399622 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.248249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.390772e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.390772e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.172278e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.285532e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.285532e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.373977 sec +TOTAL : 1.370207 sec INFO: No Floating Point Exceptions have been reported - 3,800,696,374 cycles # 2.752 GHz - 8,311,474,483 instructions # 2.19 insn per cycle - 1.381547073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) + 3,756,028,310 cycles # 2.731 GHz + 8,261,729,651 instructions # 2.20 insn per cycle + 1.376013238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.515740e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.758099e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.758099e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.782108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.008509e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.008509e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.334783 sec +TOTAL : 1.281162 sec INFO: No Floating Point Exceptions have been reported - 3,623,427,363 cycles # 2.701 GHz - 7,966,643,374 instructions # 2.20 insn per cycle - 1.342318524 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) + 3,565,563,130 cycles # 2.772 GHz + 7,911,366,462 instructions # 2.22 insn per cycle + 1.287010007 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.461308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.125295e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.125295e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.473993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.131270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.131270e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.726295 sec +TOTAL : 1.706601 sec INFO: No Floating Point Exceptions have been reported - 3,316,188,727 cycles # 1.914 GHz - 6,146,469,798 instructions # 1.85 insn per cycle - 1.733694711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) + 3,274,416,803 cycles # 1.913 GHz + 6,096,966,978 instructions # 1.86 insn per cycle + 1.712593107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index a7d3a3bcad..8649c65a6a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:53:15 +DATE: 2024-09-18_13:00:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.603732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.769962e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.882903e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.491816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706264e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.829121e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.632677 sec +TOTAL : 0.638763 sec INFO: No Floating Point Exceptions have been reported - 2,459,588,784 cycles # 2.873 GHz - 3,830,411,115 instructions # 1.56 insn per cycle - 0.912203024 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst + 2,486,309,752 cycles # 2.846 GHz + 3,832,853,586 instructions # 1.54 insn per cycle + 0.933212094 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.920423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.975843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.975843e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.926255e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.980755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980755e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.564298 sec +TOTAL : 5.530359 sec INFO: No Floating Point Exceptions have been reported - 16,322,815,770 cycles # 2.930 GHz - 45,379,862,622 instructions # 2.78 insn per cycle - 5.571986399 seconds time elapsed + 16,260,744,493 cycles # 2.938 GHz + 45,331,881,354 instructions # 2.79 insn per cycle + 5.536242796 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.531426e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.870727e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.870727e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.514119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.847574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.847574e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.417527 sec +TOTAL : 2.409823 sec INFO: No Floating Point Exceptions have been reported - 7,119,927,533 cycles # 2.937 GHz - 17,819,194,741 instructions # 2.50 insn per cycle - 2.425172051 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2: 0) (512y: 0) (512z: 0) + 7,091,224,967 cycles # 2.937 GHz + 17,790,807,442 instructions # 2.51 insn per cycle + 2.415653910 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.302342e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.447707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.447707e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.315319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.466327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.466327e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.366211 sec +TOTAL : 1.347295 sec INFO: No Floating Point Exceptions have been reported - 3,809,953,710 cycles # 2.774 GHz - 8,311,255,796 instructions # 2.18 insn per cycle - 1.374030854 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 0) (512z: 0) + 3,748,135,716 cycles # 2.771 GHz + 8,261,548,625 instructions # 2.20 insn per cycle + 1.353086220 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.793036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.010397e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.010397e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.772831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005617e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005617e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.299199 sec +TOTAL : 1.281889 sec INFO: No Floating Point Exceptions have been reported - 3,629,527,941 cycles # 2.777 GHz - 7,964,563,950 instructions # 2.19 insn per cycle - 1.308355970 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 20) (512z: 0) + 3,559,044,656 cycles # 2.766 GHz + 7,911,466,674 instructions # 2.22 insn per cycle + 1.287610992 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.490800e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.161305e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.161305e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.412498e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103906e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.720915 sec +TOTAL : 1.722151 sec INFO: No Floating Point Exceptions have been reported - 3,334,705,650 cycles # 1.929 GHz - 6,144,839,228 instructions # 1.84 insn per cycle - 1.729518385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2249) (512y: 24) (512z: 2155) + 3,304,024,823 cycles # 1.914 GHz + 6,099,911,719 instructions # 1.85 insn per cycle + 1.727529111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 1a7a19dcfe..fbbd4d7aad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:12:13 +DATE: 2024-09-18_12:12:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.207213e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.745203e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857795e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.269829e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.739721e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856627e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487469 sec +TOTAL : 0.490633 sec INFO: No Floating Point Exceptions have been reported - 2,053,225,612 cycles # 2.869 GHz - 2,963,841,312 instructions # 1.44 insn per cycle - 0.771871582 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,054,923,204 cycles # 2.868 GHz + 2,821,409,154 instructions # 1.37 insn per cycle + 0.774891828 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.964254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021824e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021824e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.972061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.029032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029032e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.438732 sec +TOTAL : 5.402943 sec INFO: No Floating Point Exceptions have been reported - 16,004,474,289 cycles # 2.939 GHz - 44,480,990,455 instructions # 2.78 insn per cycle - 5.447104045 seconds time elapsed + 15,938,200,378 cycles # 2.947 GHz + 44,441,419,092 instructions # 2.79 insn per cycle + 5.408620560 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288198669441044 Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.270854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.739248e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.739248e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.316687e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.790003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.790003e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.093047 sec +TOTAL : 2.058425 sec INFO: No Floating Point Exceptions have been reported - 6,146,902,561 cycles # 2.927 GHz - 17,124,330,277 instructions # 2.79 insn per cycle - 2.101358753 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2: 0) (512y: 0) (512z: 0) + 6,073,730,384 cycles # 2.944 GHz + 17,080,831,031 instructions # 2.81 insn per cycle + 2.063919735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193075684831 Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.010634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.590643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.590643e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.040290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.607212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.607212e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.845256 sec +TOTAL : 1.821129 sec INFO: No Floating Point Exceptions have been reported - 5,092,053,198 cycles # 2.749 GHz - 10,266,716,383 instructions # 2.02 insn per cycle - 1.853336006 seconds time elapsed + 5,028,060,974 cycles # 2.754 GHz + 10,226,327,467 instructions # 2.03 insn per cycle + 1.826739648 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.078233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.679535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.679535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.109926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.690770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.690770e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.828978 sec +TOTAL : 1.802334 sec INFO: No Floating Point Exceptions have been reported - 5,049,904,876 cycles # 2.749 GHz - 10,046,122,437 instructions # 1.99 insn per cycle - 1.837563375 seconds time elapsed + 4,967,999,007 cycles # 2.749 GHz + 9,996,248,012 instructions # 2.01 insn per cycle + 1.807786513 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288181869545951 Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.649805e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.982891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.982891e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.589191e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.908384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.908384e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.359308 sec +TOTAL : 2.373161 sec INFO: No Floating Point Exceptions have been reported - 4,441,902,341 cycles # 1.877 GHz - 8,494,262,942 instructions # 1.91 insn per cycle - 2.367884804 seconds time elapsed + 4,379,373,712 cycles # 1.842 GHz + 8,445,292,719 instructions # 1.93 insn per cycle + 2.379096717 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183148950338 Relative difference = 1.5521108056421764e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 7bcb20b104..b94de9fae6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:36:18 +DATE: 2024-09-18_12:44:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.281805e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.729157e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.845967e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.109159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754036e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870271e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487987 sec +TOTAL : 0.491488 sec INFO: No Floating Point Exceptions have been reported - 2,055,281,728 cycles # 2.872 GHz - 2,956,266,509 instructions # 1.44 insn per cycle - 0.772297546 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 + 2,077,663,912 cycles # 2.873 GHz + 2,918,599,943 instructions # 1.40 insn per cycle + 0.780254295 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.496211e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.588163e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.588163e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.511694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.603843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603843e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.287732 sec +TOTAL : 4.261367 sec INFO: No Floating Point Exceptions have been reported - 12,584,199,997 cycles # 2.932 GHz - 34,606,962,286 instructions # 2.75 insn per cycle - 4.293417398 seconds time elapsed + 12,578,636,437 cycles # 2.949 GHz + 34,608,642,396 instructions # 2.75 insn per cycle + 4.266948834 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.317872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.783743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.783743e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.245729e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.707902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.707902e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.058410 sec +TOTAL : 2.085904 sec INFO: No Floating Point Exceptions have been reported - 6,058,288,486 cycles # 2.936 GHz - 14,847,536,122 instructions # 2.45 insn per cycle - 2.064093895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2: 0) (512y: 0) (512z: 0) + 6,139,006,311 cycles # 2.936 GHz + 14,814,345,795 instructions # 2.41 insn per cycle + 2.091585873 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193414453417 -Relative difference = 1.6829758681196702e-07 +Avg ME (F77/C++) = 2.0288193755550310 +Relative difference = 1.8511017053446366e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.131335e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.950373e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.950373e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.217326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.053698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.053698e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.556955 sec +TOTAL : 1.539150 sec INFO: No Floating Point Exceptions have been reported - 4,316,973,163 cycles # 2.764 GHz - 9,053,302,579 instructions # 2.10 insn per cycle - 1.562583378 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4460) (512y: 0) (512z: 0) + 4,266,849,527 cycles # 2.764 GHz + 9,068,527,132 instructions # 2.13 insn per cycle + 1.544604329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181974319741 -Relative difference = 9.731379272303266e-08 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.308917e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.178890e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.178890e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.341390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.190395e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.521257 sec +TOTAL : 1.513835 sec INFO: No Floating Point Exceptions have been reported - 4,205,210,775 cycles # 2.756 GHz - 8,662,511,141 instructions # 2.06 insn per cycle - 1.526851661 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4225) (512y: 0) (512z: 0) + 4,209,677,652 cycles # 2.772 GHz + 8,658,962,407 instructions # 2.06 insn per cycle + 1.519314933 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4233) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181974319741 -Relative difference = 9.731379272303266e-08 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.372555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.809988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.809988e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.363197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.802509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.802509e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.038768 sec +TOTAL : 2.041800 sec INFO: No Floating Point Exceptions have been reported - 3,837,026,814 cycles # 1.878 GHz - 7,805,330,859 instructions # 2.03 insn per cycle - 2.044464874 seconds time elapsed + 3,848,539,052 cycles # 1.880 GHz + 7,805,686,420 instructions # 2.03 insn per cycle + 2.047559874 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183246739209 Relative difference = 1.6003107281264138e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 69afb6ef9f..647db6d470 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:36:38 +DATE: 2024-09-18_12:45:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.211144e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.722563e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857000e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.181121e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754734e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.875454e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487617 sec +TOTAL : 0.493098 sec INFO: No Floating Point Exceptions have been reported - 2,060,698,152 cycles # 2.879 GHz - 2,912,783,409 instructions # 1.41 insn per cycle - 0.772686787 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 + 2,068,560,161 cycles # 2.866 GHz + 2,913,404,401 instructions # 1.41 insn per cycle + 0.778994585 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028811e+00 Avg ME (F77/GPU) = 2.0288499356247485 Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.666171e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.771083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.771083e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.673571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.779018e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779018e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.019199 sec +TOTAL : 4.007457 sec INFO: No Floating Point Exceptions have been reported - 11,827,953,010 cycles # 2.940 GHz - 35,076,444,454 instructions # 2.97 insn per cycle - 4.024833106 seconds time elapsed + 11,821,622,506 cycles # 2.947 GHz + 35,077,213,703 instructions # 2.97 insn per cycle + 4.012923546 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 Avg ME (F77/C++) = 2.0288199094356969 Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.415928e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.902704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.902704e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.446071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.947640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.947640e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.023749 sec +TOTAL : 2.011399 sec INFO: No Floating Point Exceptions have been reported - 5,953,573,213 cycles # 2.935 GHz - 14,468,346,196 instructions # 2.43 insn per cycle - 2.029398775 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2: 0) (512y: 0) (512z: 0) + 5,918,531,500 cycles # 2.935 GHz + 14,532,054,201 instructions # 2.46 insn per cycle + 2.017166521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028819e+00 Avg ME (F77/C++) = 2.0288193583255634 Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.382613e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.271116e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.271116e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.388337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.293979e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.293979e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.507601 sec +TOTAL : 1.508049 sec INFO: No Floating Point Exceptions have been reported - 4,170,609,924 cycles # 2.758 GHz - 8,881,070,721 instructions # 2.13 insn per cycle - 1.513291878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3570) (512y: 0) (512z: 0) + 4,192,067,529 cycles # 2.771 GHz + 8,850,538,175 instructions # 2.11 insn per cycle + 1.513555792 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182104704902 -Relative difference = 1.0374044905426431e-07 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.441367e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.356283e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.356283e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.539896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.448863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.448863e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.495607 sec +TOTAL : 1.476824 sec INFO: No Floating Point Exceptions have been reported - 4,129,531,699 cycles # 2.752 GHz - 8,406,651,679 instructions # 2.04 insn per cycle - 1.501280641 seconds time elapsed + 4,124,218,335 cycles # 2.783 GHz + 8,408,510,612 instructions # 2.04 insn per cycle + 1.482399691 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182104704902 -Relative difference = 1.0374044905426431e-07 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.439343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.891063e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.891063e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.510377e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.974414e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.974414e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.014863 sec +TOTAL : 1.989253 sec INFO: No Floating Point Exceptions have been reported - 3,794,076,081 cycles # 1.879 GHz - 7,699,347,303 instructions # 2.03 insn per cycle - 2.020593600 seconds time elapsed + 3,785,582,278 cycles # 1.899 GHz + 7,698,584,647 instructions # 2.03 insn per cycle + 1.994773359 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028818e+00 Avg ME (F77/C++) = 2.0288183204829693 Relative difference = 1.5796536184903122e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 5fcfefd8b1..ac99bf7b60 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:12:35 +DATE: 2024-09-18_12:13:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.360888e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.282446e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.948861e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.415407e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.358342e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002564e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.535947 sec +TOTAL : 0.531223 sec INFO: No Floating Point Exceptions have been reported - 2,206,968,400 cycles # 2.863 GHz - 3,177,366,447 instructions # 1.44 insn per cycle - 0.829346124 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,212,715,399 cycles # 2.883 GHz + 3,174,354,481 instructions # 1.43 insn per cycle + 0.824625337 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063423243874 Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.801369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.847904e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.847904e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.812224e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.858502e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858502e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.967015 sec +TOTAL : 5.893700 sec INFO: No Floating Point Exceptions have been reported - 17,524,709,788 cycles # 2.932 GHz - 46,191,860,900 instructions # 2.64 insn per cycle - 5.978935443 seconds time elapsed + 17,384,515,155 cycles # 2.947 GHz + 46,085,827,160 instructions # 2.65 insn per cycle + 5.899425018 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.140467e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.299172e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.299172e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.230178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393068e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.487825 sec +TOTAL : 3.353996 sec INFO: No Floating Point Exceptions have been reported - 10,261,674,067 cycles # 2.934 GHz - 27,722,537,189 instructions # 2.70 insn per cycle - 3.498978005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) + 9,906,776,741 cycles # 2.949 GHz + 27,581,204,322 instructions # 2.78 insn per cycle + 3.359750594 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.032137e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.431710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.431710e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.070803e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.467527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.467527e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.231910 sec +TOTAL : 2.175848 sec INFO: No Floating Point Exceptions have been reported - 6,175,976,175 cycles # 2.753 GHz - 12,601,670,185 instructions # 2.04 insn per cycle - 2.244594220 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 0) (512z: 0) + 6,033,401,789 cycles # 2.767 GHz + 12,481,778,172 instructions # 2.07 insn per cycle + 2.181604261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2773) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.534616e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.015525e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.015525e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.576261e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.054850e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.054850e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.042072 sec +TOTAL : 1.987931 sec INFO: No Floating Point Exceptions have been reported - 5,669,805,165 cycles # 2.764 GHz - 12,036,562,183 instructions # 2.12 insn per cycle - 2.054181107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2519) (512y: 146) (512z: 0) + 5,526,359,959 cycles # 2.773 GHz + 11,919,157,674 instructions # 2.16 insn per cycle + 1.993761374 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2518) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.587962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.783361e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783361e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.583667e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.773486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773486e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.070999 sec +TOTAL : 3.034227 sec INFO: No Floating Point Exceptions have been reported - 5,754,052,644 cycles # 1.867 GHz - 8,225,264,257 instructions # 1.43 insn per cycle - 3.082911381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1863) + 5,618,120,727 cycles # 1.849 GHz + 8,105,692,593 instructions # 1.44 insn per cycle + 3.040009315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1862) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index f3ccad1744..d60a3db604 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_11:13:00 +DATE: 2024-09-18_12:13:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.403841e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.350229e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.960040e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.391860e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.272095e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.937370e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.532145 sec +TOTAL : 0.534370 sec INFO: No Floating Point Exceptions have been reported - 2,202,225,096 cycles # 2.860 GHz - 3,140,327,784 instructions # 1.43 insn per cycle - 0.826897706 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,212,045,639 cycles # 2.882 GHz + 3,154,512,029 instructions # 1.43 insn per cycle + 0.826500836 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 Avg ME (F77/GPU) = 2.0288063423243874 Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.849779e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.898891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.898891e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.857330e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.905433e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.905433e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.812131 sec +TOTAL : 5.752139 sec INFO: No Floating Point Exceptions have been reported - 17,080,273,912 cycles # 2.934 GHz - 45,215,696,703 instructions # 2.65 insn per cycle - 5.823642020 seconds time elapsed + 16,956,103,485 cycles # 2.946 GHz + 45,111,671,387 instructions # 2.66 insn per cycle + 5.757950281 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.353337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.532203e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.532203e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.369201e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.545470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.545470e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.272970 sec +TOTAL : 3.220654 sec INFO: No Floating Point Exceptions have been reported - 9,622,203,648 cycles # 2.930 GHz - 26,352,115,115 instructions # 2.74 insn per cycle - 3.284795843 seconds time elapsed + 9,518,675,134 cycles # 2.951 GHz + 26,252,301,051 instructions # 2.76 insn per cycle + 3.226704286 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.499939e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.814164e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.814164e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.516544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830416e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.448542 sec +TOTAL : 2.429009 sec INFO: No Floating Point Exceptions have been reported - 6,760,703,277 cycles # 2.754 GHz - 14,051,302,777 instructions # 2.08 insn per cycle - 2.455916079 seconds time elapsed + 6,737,120,781 cycles # 2.769 GHz + 14,029,549,404 instructions # 2.08 insn per cycle + 2.434732608 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.725371e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.062901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.062901e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.763106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.113046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.113046e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.336085 sec +TOTAL : 2.308488 sec INFO: No Floating Point Exceptions have been reported - 6,433,871,158 cycles # 2.746 GHz - 13,544,684,713 instructions # 2.11 insn per cycle - 2.343643276 seconds time elapsed + 6,400,709,122 cycles # 2.767 GHz + 13,521,645,446 instructions # 2.11 insn per cycle + 2.314138282 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.546154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.730746e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.730746e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.631126e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.827064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.827064e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.074398 sec +TOTAL : 2.996339 sec INFO: No Floating Point Exceptions have been reported - 5,684,367,008 cycles # 1.845 GHz - 9,231,965,840 instructions # 1.62 insn per cycle - 3.081687192 seconds time elapsed + 5,581,413,243 cycles # 1.860 GHz + 9,205,937,992 instructions # 1.65 insn per cycle + 3.002095742 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288064057068964 Relative difference = 2.9292737240031234e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 0fe4cfc922..de5eca26a8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:13:25 +DATE: 2024-09-18_12:13:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.646703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.903323e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.008440e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.672201e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.887935e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.992853e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.472515 sec +TOTAL : 0.477007 sec INFO: No Floating Point Exceptions have been reported - 1,976,395,629 cycles # 2.864 GHz - 2,853,369,004 instructions # 1.44 insn per cycle - 0.746437756 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,987,624,447 cycles # 2.874 GHz + 2,861,967,134 instructions # 1.44 insn per cycle + 0.751704376 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.044065e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.229313e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.240372e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.044656e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231568e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242034e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.613454 sec +TOTAL : 0.614374 sec INFO: No Floating Point Exceptions have been reported - 2,457,306,952 cycles # 2.873 GHz - 3,760,458,763 instructions # 1.53 insn per cycle - 0.914581816 seconds time elapsed + 2,464,089,898 cycles # 2.883 GHz + 3,693,413,015 instructions # 1.50 insn per cycle + 0.914175309 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.423163e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.435086e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.435086e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.435389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.447579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.447579e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.782254 sec +TOTAL : 6.748690 sec INFO: No Floating Point Exceptions have been reported - 19,933,068,888 cycles # 2.938 GHz - 59,910,639,029 instructions # 3.01 insn per cycle - 6.786428407 seconds time elapsed + 19,905,580,584 cycles # 2.948 GHz + 59,914,464,179 instructions # 3.01 insn per cycle + 6.753011110 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.574256e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615519e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.615519e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.605126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.648126e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.648126e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.601510 sec +TOTAL : 3.577729 sec INFO: No Floating Point Exceptions have been reported - 10,564,249,920 cycles # 2.931 GHz - 31,083,049,027 instructions # 2.94 insn per cycle - 3.605720194 seconds time elapsed + 10,567,541,735 cycles # 2.951 GHz + 31,084,954,146 instructions # 2.94 insn per cycle + 3.582009862 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.109445e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.271688e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.271688e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.119843e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.286275e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.286275e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.818340 sec +TOTAL : 1.816628 sec INFO: No Floating Point Exceptions have been reported - 4,995,758,651 cycles # 2.742 GHz - 11,404,411,821 instructions # 2.28 insn per cycle - 1.822613950 seconds time elapsed + 5,009,875,098 cycles # 2.752 GHz + 11,404,863,740 instructions # 2.28 insn per cycle + 1.820981146 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.031962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.052518e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.052518e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.027376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048667e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.607425 sec +TOTAL : 1.614828 sec INFO: No Floating Point Exceptions have been reported - 4,440,179,427 cycles # 2.756 GHz - 10,663,032,994 instructions # 2.40 insn per cycle - 1.611644858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) + 4,447,516,452 cycles # 2.748 GHz + 10,663,621,215 instructions # 2.40 insn per cycle + 1.619180273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.095641e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.193407e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.193407e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.153517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.257338e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.257338e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.331432 sec +TOTAL : 2.312792 sec INFO: No Floating Point Exceptions have been reported - 4,128,663,715 cycles # 1.768 GHz - 5,965,561,050 instructions # 1.44 insn per cycle - 2.335809030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) + 4,128,948,366 cycles # 1.783 GHz + 5,970,641,302 instructions # 1.45 insn per cycle + 2.317202499 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 555f99fae8..9c43264546 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:47:00 +DATE: 2024-09-18_12:53:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.462205e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.092748e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.092748e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.507916e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.178599e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.178599e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.504368 sec +TOTAL : 0.502648 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,053,925,551 cycles # 2.862 GHz - 3,120,835,610 instructions # 1.52 insn per cycle - 0.775288198 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 2,068,761,834 cycles # 2.877 GHz + 3,090,755,102 instructions # 1.49 insn per cycle + 0.775689457 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.695563e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.383097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.383097e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.673734e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.373672e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.373672e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.834031 sec +TOTAL : 0.833395 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,122,984,951 cycles # 2.884 GHz - 5,028,895,726 instructions # 1.61 insn per cycle - 1.144542739 seconds time elapsed + 3,144,886,808 cycles # 2.895 GHz + 5,022,532,373 instructions # 1.60 insn per cycle + 1.144806482 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.420974e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433351e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.433351e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.430493e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.443016e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.443016e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.797187 sec +TOTAL : 6.770345 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 19,924,578,475 cycles # 2.930 GHz - 59,919,807,490 instructions # 3.01 insn per cycle - 6.801426045 seconds time elapsed + 19,935,799,744 cycles # 2.943 GHz + 59,921,717,219 instructions # 3.01 insn per cycle + 6.775096176 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.519993e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.562146e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.562146e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.571029e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615207e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.653302 sec +TOTAL : 3.613337 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,746,732,815 cycles # 2.939 GHz - 31,134,499,346 instructions # 2.90 insn per cycle - 3.657616586 seconds time elapsed + 10,624,808,815 cycles # 2.938 GHz + 31,136,068,452 instructions # 2.93 insn per cycle + 3.618153867 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.065515e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.233179e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.233179e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.976002e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.144750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.144750e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.835810 sec +TOTAL : 1.854528 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,036,150,788 cycles # 2.739 GHz - 11,457,434,104 instructions # 2.28 insn per cycle - 1.839969686 seconds time elapsed + 5,122,960,994 cycles # 2.757 GHz + 11,456,752,385 instructions # 2.24 insn per cycle + 1.859209871 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.012086e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033024e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.023623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045107e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.647583 sec +TOTAL : 1.629549 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,499,476,819 cycles # 2.725 GHz - 10,716,818,624 instructions # 2.38 insn per cycle - 1.651828196 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 91) (512z: 0) + 4,493,284,400 cycles # 2.751 GHz + 10,714,819,935 instructions # 2.38 insn per cycle + 1.634203375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.046506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.146814e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.146814e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.121040e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.229108e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.229108e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.355729 sec +TOTAL : 2.332216 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,171,753,505 cycles # 1.769 GHz - 6,006,835,350 instructions # 1.44 insn per cycle - 2.359914843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1617) (512y: 95) (512z: 3577) + 4,174,771,858 cycles # 1.787 GHz + 6,010,349,590 instructions # 1.44 insn per cycle + 2.336931936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index df418c0c55..8cdcf50b56 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:13:51 +DATE: 2024-09-18_12:14:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.819441e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.940165e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.036270e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.625266e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.900146e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.003656e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473171 sec +TOTAL : 0.475988 sec INFO: No Floating Point Exceptions have been reported - 1,973,905,564 cycles # 2.864 GHz - 2,835,389,936 instructions # 1.44 insn per cycle - 0.747859769 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,977,726,110 cycles # 2.850 GHz + 2,827,901,574 instructions # 1.43 insn per cycle + 0.751347521 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.045923e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.239053e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.249723e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.046576e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234116e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.244538e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.613723 sec +TOTAL : 0.611168 sec INFO: No Floating Point Exceptions have been reported - 2,468,512,324 cycles # 2.879 GHz - 3,722,507,305 instructions # 1.51 insn per cycle - 0.915283019 seconds time elapsed + 2,454,142,578 cycles # 2.886 GHz + 3,695,001,143 instructions # 1.51 insn per cycle + 0.909771724 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213684418649 Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.416240e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.428350e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.428350e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.436838e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.448877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.448877e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.802073 sec +TOTAL : 6.744113 sec INFO: No Floating Point Exceptions have been reported - 19,919,234,926 cycles # 2.929 GHz - 60,126,857,831 instructions # 3.02 insn per cycle - 6.806341598 seconds time elapsed + 19,898,434,725 cycles # 2.949 GHz + 60,128,447,647 instructions # 3.02 insn per cycle + 6.748351399 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.628115e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.671006e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.671006e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.649169e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.692956e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692956e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.560395 sec +TOTAL : 3.544209 sec INFO: No Floating Point Exceptions have been reported - 10,470,027,689 cycles # 2.938 GHz - 30,685,175,745 instructions # 2.93 insn per cycle - 3.564357324 seconds time elapsed + 10,481,283,758 cycles # 2.954 GHz + 30,686,827,574 instructions # 2.93 insn per cycle + 3.548515404 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684432433 Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.858005e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.013532e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.013532e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.897572e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.058943e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.058943e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.870006 sec +TOTAL : 1.861619 sec INFO: No Floating Point Exceptions have been reported - 5,129,037,452 cycles # 2.738 GHz - 11,838,972,708 instructions # 2.31 insn per cycle - 1.873874088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4748) (512y: 0) (512z: 0) + 5,141,047,361 cycles # 2.756 GHz + 11,838,355,420 instructions # 2.30 insn per cycle + 1.866119668 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4746) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.652883e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.834044e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.834044e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.640218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828831e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828831e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.717936 sec +TOTAL : 1.719787 sec INFO: No Floating Point Exceptions have been reported - 4,726,163,144 cycles # 2.747 GHz - 11,165,051,323 instructions # 2.36 insn per cycle - 1.721718897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 245) (512z: 0) + 4,732,734,719 cycles # 2.746 GHz + 11,163,471,114 instructions # 2.36 insn per cycle + 1.724312193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 246) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416466 Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.029308e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.126499e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.126499e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.072241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.175446e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.175446e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.353551 sec +TOTAL : 2.339175 sec INFO: No Floating Point Exceptions have been reported - 4,165,348,623 cycles # 1.768 GHz - 6,220,012,480 instructions # 1.49 insn per cycle - 2.357450464 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1513) (512y: 140) (512z: 3679) + 4,159,319,454 cycles # 1.776 GHz + 6,222,343,045 instructions # 1.50 insn per cycle + 2.343565013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1516) (512y: 139) (512z: 3679) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0344b19ae4..b9aad18eeb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:14:17 +DATE: 2024-09-18_12:14:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.690436e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.002102e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.037933e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.682161e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.012912e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052707e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.455498 sec +TOTAL : 0.457449 sec INFO: No Floating Point Exceptions have been reported - 1,928,242,310 cycles # 2.870 GHz - 2,746,045,826 instructions # 1.42 insn per cycle - 0.728814382 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,934,954,114 cycles # 2.865 GHz + 2,736,882,841 instructions # 1.41 insn per cycle + 0.732650423 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.680469e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.378510e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.424836e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.683155e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385425e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.426136e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.509889 sec +TOTAL : 0.507459 sec INFO: No Floating Point Exceptions have been reported - 2,121,031,452 cycles # 2.862 GHz - 3,036,959,694 instructions # 1.43 insn per cycle - 0.800065199 seconds time elapsed + 2,120,407,613 cycles # 2.883 GHz + 3,024,448,335 instructions # 1.43 insn per cycle + 0.792985016 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412607e+00 Avg ME (F77/GPU) = 1.4132214305330990 Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.504687e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.517708e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.517708e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.506015e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.518972e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518972e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.560247 sec +TOTAL : 6.556943 sec INFO: No Floating Point Exceptions have been reported - 19,257,464,373 cycles # 2.934 GHz - 59,612,594,917 instructions # 3.10 insn per cycle - 6.564375492 seconds time elapsed + 19,264,218,294 cycles # 2.937 GHz + 59,614,798,383 instructions # 3.09 insn per cycle + 6.560956742 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.084277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.218488e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.218488e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.070356e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.207853e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.207853e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.043801 sec +TOTAL : 2.048077 sec INFO: No Floating Point Exceptions have been reported - 6,009,096,977 cycles # 2.936 GHz - 17,060,655,087 instructions # 2.84 insn per cycle - 2.047534449 seconds time elapsed + 6,023,874,049 cycles # 2.936 GHz + 17,061,893,848 instructions # 2.83 insn per cycle + 2.052246672 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.741509e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801920e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801920e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.743575e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804848e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.958794 sec +TOTAL : 0.957876 sec INFO: No Floating Point Exceptions have been reported - 2,632,796,186 cycles # 2.737 GHz - 6,187,347,650 instructions # 2.35 insn per cycle - 0.962496439 seconds time elapsed + 2,640,887,772 cycles # 2.747 GHz + 6,187,336,173 instructions # 2.34 insn per cycle + 0.962119669 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.912269e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.986419e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.986419e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.915124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989470e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.875039 sec +TOTAL : 0.873685 sec INFO: No Floating Point Exceptions have been reported - 2,407,469,182 cycles # 2.742 GHz - 5,790,784,602 instructions # 2.41 insn per cycle - 0.878768885 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) + 2,402,820,009 cycles # 2.739 GHz + 5,790,162,566 instructions # 2.41 insn per cycle + 0.877828237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.443174e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485583e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.453255e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.496895e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496895e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.155478 sec +TOTAL : 1.147590 sec INFO: No Floating Point Exceptions have been reported - 2,073,615,836 cycles # 1.790 GHz - 3,391,178,624 instructions # 1.64 insn per cycle - 1.159306518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) + 2,076,037,431 cycles # 1.804 GHz + 3,391,394,333 instructions # 1.63 insn per cycle + 1.151886126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 93fdf05be3..1d937591ab 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:47:26 +DATE: 2024-09-18_12:54:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.480682e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.545762e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.545762e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.452792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.504415e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504415e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.469335 sec +TOTAL : 0.473960 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,971,009,268 cycles # 2.861 GHz - 2,878,621,667 instructions # 1.46 insn per cycle - 0.747088963 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 1,972,315,192 cycles # 2.868 GHz + 2,911,549,142 instructions # 1.48 insn per cycle + 0.746422585 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.503732e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.296845e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296845e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.537799e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.260766e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.260766e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 -TOTAL : 0.653804 sec +TOTAL : 0.654007 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,546,264,462 cycles # 2.875 GHz - 3,884,000,523 instructions # 1.53 insn per cycle - 0.944357505 seconds time elapsed + 2,561,684,908 cycles # 2.881 GHz + 3,893,804,940 instructions # 1.52 insn per cycle + 0.947747663 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412607e+00 Avg ME (F77/GPU) = 1.4132214305330990 Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.503119e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.516211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.516211e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.511557e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.524668e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524668e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.568401 sec +TOTAL : 6.546653 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 19,288,292,034 cycles # 2.936 GHz - 59,615,397,281 instructions # 3.09 insn per cycle - 6.572330246 seconds time elapsed + 19,271,491,646 cycles # 2.942 GHz + 59,619,016,957 instructions # 3.09 insn per cycle + 6.550964309 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.074244e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.211619e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.211619e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.075407e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.222998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.222998e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.051717 sec +TOTAL : 2.052234 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,032,564,255 cycles # 2.936 GHz - 17,108,905,426 instructions # 2.84 insn per cycle - 2.055577630 seconds time elapsed + 6,045,406,228 cycles # 2.940 GHz + 17,110,194,161 instructions # 2.83 insn per cycle + 2.056632379 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.735664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.796597e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.796597e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.741961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804731e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.965943 sec +TOTAL : 0.963728 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,652,269,080 cycles # 2.742 GHz - 6,224,274,753 instructions # 2.35 insn per cycle - 0.969710233 seconds time elapsed + 2,665,943,840 cycles # 2.756 GHz + 6,224,556,067 instructions # 2.33 insn per cycle + 0.968076233 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.901208e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.974602e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.974602e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.889422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966151e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966151e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.883566 sec +TOTAL : 0.890681 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,424,412,243 cycles # 2.734 GHz - 5,827,930,388 instructions # 2.40 insn per cycle - 0.887425140 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4908) (512y: 36) (512z: 0) + 2,436,025,235 cycles # 2.723 GHz + 5,827,123,635 instructions # 2.39 insn per cycle + 0.895318545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.438672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.481527e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.481527e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.443092e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.487537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.487537e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.163548 sec +TOTAL : 1.160657 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,093,109,210 cycles # 1.794 GHz - 3,432,132,802 instructions # 1.64 insn per cycle - 1.167531871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2235) (512y: 39) (512z: 3789) + 2,101,025,117 cycles # 1.805 GHz + 3,433,428,500 instructions # 1.63 insn per cycle + 1.165027687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 04fc107fbb..4251937b55 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:14:38 +DATE: 2024-09-18_12:15:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.693098e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040838e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074373e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.677136e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.031870e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.066369e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.456865 sec +TOTAL : 0.462735 sec INFO: No Floating Point Exceptions have been reported - 1,922,600,682 cycles # 2.857 GHz - 2,735,771,538 instructions # 1.42 insn per cycle - 0.729676644 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,946,975,962 cycles # 2.877 GHz + 2,736,807,999 instructions # 1.41 insn per cycle + 0.735970844 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.681790e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366127e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.406124e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.680283e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366147e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409371e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.509422 sec +TOTAL : 0.508125 sec INFO: No Floating Point Exceptions have been reported - 2,117,565,229 cycles # 2.869 GHz - 3,056,275,302 instructions # 1.44 insn per cycle - 0.796290434 seconds time elapsed + 2,113,521,464 cycles # 2.866 GHz + 3,052,176,829 instructions # 1.44 insn per cycle + 0.794975331 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.412607e+00 Avg ME (F77/GPU) = 1.4132214305330990 Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.491972e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504565e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.504565e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.498167e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510888e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510888e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.593721 sec +TOTAL : 6.577268 sec INFO: No Floating Point Exceptions have been reported - 19,401,378,848 cycles # 2.941 GHz - 59,351,233,195 instructions # 3.06 insn per cycle - 6.597810534 seconds time elapsed + 19,407,580,643 cycles # 2.949 GHz + 59,354,263,399 instructions # 3.06 insn per cycle + 6.581442326 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129949096991936 Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.427450e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.574205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.574205e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.398785e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.549497e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.549497e+04 ) sec^-1 MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.961524 sec +TOTAL : 1.968439 sec INFO: No Floating Point Exceptions have been reported - 5,763,417,063 cycles # 2.934 GHz - 16,848,552,420 instructions # 2.92 insn per cycle - 1.965663621 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5611) (avx2: 0) (512y: 0) (512z: 0) + 5,775,824,576 cycles # 2.929 GHz + 16,849,685,670 instructions # 2.92 insn per cycle + 1.972573842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.412995e+00 Avg ME (F77/C++) = 1.4129954647353316 Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.513418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.559668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.559668e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.527004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.573961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.573961e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.100420 sec +TOTAL : 1.091009 sec INFO: No Floating Point Exceptions have been reported - 3,014,454,268 cycles # 2.733 GHz - 6,847,622,992 instructions # 2.27 insn per cycle - 1.104094178 seconds time elapsed + 3,021,095,483 cycles # 2.760 GHz + 6,848,870,145 instructions # 2.27 insn per cycle + 1.095189540 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.641246e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.695747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.695747e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.611080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664155e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664155e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.015943 sec +TOTAL : 1.035237 sec INFO: No Floating Point Exceptions have been reported - 2,793,517,683 cycles # 2.742 GHz - 6,436,907,448 instructions # 2.30 insn per cycle - 1.019630864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 22) (512z: 0) + 2,858,508,214 cycles # 2.752 GHz + 6,438,110,737 instructions # 2.25 insn per cycle + 1.039480125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 23) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413313e+00 Avg ME (F77/C++) = 1.4133132969790267 Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.322600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358678e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358678e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.329594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.366106e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.366106e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.258576 sec +TOTAL : 1.252535 sec INFO: No Floating Point Exceptions have been reported - 2,248,626,373 cycles # 1.783 GHz - 3,754,168,834 instructions # 1.67 insn per cycle - 1.262333902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2466) (512y: 29) (512z: 4084) + 2,255,457,879 cycles # 1.796 GHz + 3,755,585,205 instructions # 1.67 insn per cycle + 1.256791945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 28) (512z: 4084) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413316e+00 Avg ME (F77/C++) = 1.4133164033579249 Relative difference = 2.85398258307829e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 2641b6a6f8..09551986c9 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:14:59 +DATE: 2024-09-18_12:15:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.553984e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.813684e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.925982e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.594581e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.871606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.970951e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473727 sec +TOTAL : 0.473359 sec INFO: No Floating Point Exceptions have been reported - 1,964,125,217 cycles # 2.842 GHz - 2,850,802,933 instructions # 1.45 insn per cycle - 0.747533169 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,992,272,995 cycles # 2.881 GHz + 2,873,441,271 instructions # 1.44 insn per cycle + 0.748198068 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.039046e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.224514e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235497e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.037058e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222746e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233058e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.623745 sec +TOTAL : 0.613274 sec INFO: No Floating Point Exceptions have been reported - 2,491,113,981 cycles # 2.884 GHz - 3,741,355,868 instructions # 1.50 insn per cycle - 0.924633822 seconds time elapsed + 2,464,972,790 cycles # 2.891 GHz + 3,748,511,486 instructions # 1.52 insn per cycle + 0.912099116 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213755569487 Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.390069e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.401784e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.401784e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.399672e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.411896e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.411896e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.876043 sec +TOTAL : 6.848810 sec INFO: No Floating Point Exceptions have been reported - 20,176,006,103 cycles # 2.934 GHz - 60,944,588,650 instructions # 3.02 insn per cycle - 6.880217907 seconds time elapsed + 20,197,037,339 cycles # 2.948 GHz + 60,947,415,438 instructions # 3.02 insn per cycle + 6.853052511 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.624106e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.667122e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.667122e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.642526e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.687090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.687090e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.563355 sec +TOTAL : 3.549259 sec INFO: No Floating Point Exceptions have been reported - 10,467,283,500 cycles # 2.935 GHz - 30,820,693,493 instructions # 2.94 insn per cycle - 3.567171047 seconds time elapsed + 10,477,481,501 cycles # 2.949 GHz + 30,820,930,825 instructions # 2.94 insn per cycle + 3.553666211 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.172379e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.336577e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.336577e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.196444e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.370621e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.370621e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.806966 sec +TOTAL : 1.802004 sec INFO: No Floating Point Exceptions have been reported - 4,954,879,411 cycles # 2.737 GHz - 11,359,422,816 instructions # 2.29 insn per cycle - 1.810872816 seconds time elapsed + 4,965,652,288 cycles # 2.750 GHz + 11,359,248,854 instructions # 2.29 insn per cycle + 1.806342805 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.036379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.057513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057513e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.041756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.063436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063436e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.601666 sec +TOTAL : 1.592695 sec INFO: No Floating Point Exceptions have been reported - 4,380,983,099 cycles # 2.729 GHz - 10,610,165,712 instructions # 2.42 insn per cycle - 1.605990710 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 83) (512z: 0) + 4,382,366,442 cycles # 2.746 GHz + 10,608,797,295 instructions # 2.42 insn per cycle + 1.596978533 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 84) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.900446e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.995461e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.995461e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.957560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.055998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.055998e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.397174 sec +TOTAL : 2.377375 sec INFO: No Floating Point Exceptions have been reported - 4,245,323,919 cycles # 1.769 GHz - 6,166,210,089 instructions # 1.45 insn per cycle - 2.401100901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2140) (512y: 117) (512z: 3653) + 4,237,877,454 cycles # 1.780 GHz + 6,168,521,326 instructions # 1.46 insn per cycle + 2.381770690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2143) (512y: 116) (512z: 3653) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 0766319c3b..e31dab3bcb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-15_11:15:25 +DATE: 2024-09-18_12:15:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.556212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.906743e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.026143e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.665772e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.933205e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.041936e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.472306 sec +TOTAL : 0.474243 sec INFO: No Floating Point Exceptions have been reported - 1,982,520,983 cycles # 2.874 GHz - 2,863,074,866 instructions # 1.44 insn per cycle - 0.745845869 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,991,405,654 cycles # 2.879 GHz + 2,864,466,394 instructions # 1.44 insn per cycle + 0.749070557 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.042838e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231665e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242155e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.042210e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.228789e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239192e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.611235 sec +TOTAL : 0.610923 sec INFO: No Floating Point Exceptions have been reported - 2,455,829,243 cycles # 2.879 GHz - 3,741,729,771 instructions # 1.52 insn per cycle - 0.912428146 seconds time elapsed + 2,452,672,007 cycles # 2.882 GHz + 3,739,836,978 instructions # 1.52 insn per cycle + 0.910769372 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 Avg ME (F77/GPU) = 1.4131213755569487 Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.386940e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.398394e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.398394e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.379932e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.391571e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391571e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.884140 sec +TOTAL : 6.905036 sec INFO: No Floating Point Exceptions have been reported - 20,272,568,697 cycles # 2.944 GHz - 61,168,730,148 instructions # 3.02 insn per cycle - 6.888274413 seconds time elapsed + 20,270,175,803 cycles # 2.935 GHz + 61,175,514,110 instructions # 3.02 insn per cycle + 6.909213331 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.669440e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.713215e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.713215e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.712701e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.757964e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.757964e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.529000 sec +TOTAL : 3.496353 sec INFO: No Floating Point Exceptions have been reported - 10,335,535,502 cycles # 2.926 GHz - 30,533,410,675 instructions # 2.95 insn per cycle - 3.532867905 seconds time elapsed + 10,330,450,764 cycles # 2.952 GHz + 30,532,965,755 instructions # 2.96 insn per cycle + 3.500721812 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.803371e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.957146e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.957146e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.873461e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.031366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.031366e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.880842 sec +TOTAL : 1.866310 sec INFO: No Floating Point Exceptions have been reported - 5,141,108,977 cycles # 2.729 GHz - 11,871,626,607 instructions # 2.31 insn per cycle - 1.885060685 seconds time elapsed + 5,149,448,063 cycles # 2.754 GHz + 11,872,714,422 instructions # 2.31 insn per cycle + 1.870704205 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.734351e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.920231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.920231e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.721277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.910902e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910902e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.703697 sec +TOTAL : 1.705742 sec INFO: No Floating Point Exceptions have been reported - 4,677,605,202 cycles # 2.740 GHz - 11,166,557,237 instructions # 2.39 insn per cycle - 1.707597039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 238) (512z: 0) + 4,682,307,882 cycles # 2.740 GHz + 11,166,992,215 instructions # 2.38 insn per cycle + 1.710031590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 239) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.863155e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.956338e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.956338e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.916313e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.015099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.015099e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.409053 sec +TOTAL : 2.391177 sec INFO: No Floating Point Exceptions have been reported - 4,255,960,621 cycles # 1.764 GHz - 6,404,237,522 instructions # 1.50 insn per cycle - 2.413297760 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 163) (512z: 3731) + 4,255,173,095 cycles # 1.777 GHz + 6,409,630,981 instructions # 1.51 insn per cycle + 2.395610797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 162) (512z: 3731) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213786174055 Relative difference = 4.3972324717191576e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 5f3726dcea..e60a3b56f2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:15:51 +DATE: 2024-09-18_12:16:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.308012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.334511e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.336215e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.313288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.338946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340893e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533591 sec +TOTAL : 0.535030 sec INFO: No Floating Point Exceptions have been reported - 2,205,805,938 cycles # 2.869 GHz - 3,444,884,387 instructions # 1.56 insn per cycle - 0.825307966 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 + 2,203,915,832 cycles # 2.863 GHz + 3,411,363,725 instructions # 1.55 insn per cycle + 0.826937803 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.131073e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161174e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162405e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.139082e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170140e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.048991 sec +TOTAL : 3.048810 sec INFO: No Floating Point Exceptions have been reported - 9,688,028,273 cycles # 2.924 GHz - 22,036,541,373 instructions # 2.27 insn per cycle - 3.369850770 seconds time elapsed + 9,673,114,822 cycles # 2.925 GHz + 22,022,328,349 instructions # 2.28 insn per cycle + 3.363974995 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.879946e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.880868e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880868e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.884766e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885678e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885678e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.730075 sec +TOTAL : 8.707888 sec INFO: No Floating Point Exceptions have been reported - 25,643,153,835 cycles # 2.937 GHz - 78,954,437,611 instructions # 3.08 insn per cycle - 8.734432118 seconds time elapsed + 25,646,480,577 cycles # 2.944 GHz + 78,959,199,970 instructions # 3.08 insn per cycle + 8.712344144 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.520374e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.523613e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.523613e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.525938e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.529103e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529103e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.665332 sec +TOTAL : 4.657640 sec INFO: No Floating Point Exceptions have been reported - 13,099,128,105 cycles # 2.806 GHz - 39,559,591,481 instructions # 3.02 insn per cycle - 4.669271517 seconds time elapsed + 13,102,337,051 cycles # 2.811 GHz + 39,559,050,978 instructions # 3.02 insn per cycle + 4.662071177 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.059011e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.075081e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.075081e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.037518e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.054750e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.054750e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.042413 sec +TOTAL : 2.047044 sec INFO: No Floating Point Exceptions have been reported - 5,610,747,752 cycles # 2.743 GHz - 13,824,504,616 instructions # 2.46 insn per cycle - 2.046398223 seconds time elapsed + 5,613,016,028 cycles # 2.737 GHz + 13,823,575,120 instructions # 2.46 insn per cycle + 2.051472192 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.162703e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.184308e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.184308e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.172996e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.194283e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.194283e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.796726 sec +TOTAL : 1.794478 sec INFO: No Floating Point Exceptions have been reported - 4,922,237,700 cycles # 2.735 GHz - 12,506,994,545 instructions # 2.54 insn per cycle - 1.800589813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) + 4,922,583,154 cycles # 2.738 GHz + 12,506,595,932 instructions # 2.54 insn per cycle + 1.798855063 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.982168e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.994583e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.994583e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.987584e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.999990e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.999990e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.355452 sec +TOTAL : 2.353771 sec INFO: No Floating Point Exceptions have been reported - 4,140,123,386 cycles # 1.756 GHz - 6,390,153,387 instructions # 1.54 insn per cycle - 2.359734916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) + 4,138,447,690 cycles # 1.756 GHz + 6,393,230,519 instructions # 1.54 insn per cycle + 2.358130141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 1d93db579b..40b573a43c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:48:14 +DATE: 2024-09-18_12:55:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.969430e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.268357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.268357e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.976623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.275789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.275789e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523555 sec +TOTAL : 0.524913 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,166,841,175 cycles # 2.867 GHz - 3,453,451,458 instructions # 1.59 insn per cycle - 0.814918597 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 2,187,535,830 cycles # 2.870 GHz + 3,393,578,749 instructions # 1.55 insn per cycle + 0.821118226 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.613032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.091578e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.091578e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.647884e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.131075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.131075e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.317499 sec +TOTAL : 3.300824 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 10,422,723,136 cycles # 2.898 GHz - 15,879,167,379 instructions # 1.52 insn per cycle - 3.658545225 seconds time elapsed + 10,456,540,936 cycles # 2.924 GHz + 23,609,445,897 instructions # 2.26 insn per cycle + 3.631983066 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.878765e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.879684e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.879684e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.884487e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885391e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.741583 sec +TOTAL : 8.714099 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 25,666,397,830 cycles # 2.935 GHz - 78,965,262,045 instructions # 3.08 insn per cycle - 8.745862119 seconds time elapsed + 25,667,183,320 cycles # 2.944 GHz + 78,962,641,614 instructions # 3.08 insn per cycle + 8.718743024 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.560977e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.564368e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.564368e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.526771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.530011e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530011e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.617447 sec +TOTAL : 4.660484 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 13,088,026,122 cycles # 2.833 GHz - 39,572,731,788 instructions # 3.02 insn per cycle - 4.621932955 seconds time elapsed + 13,111,732,509 cycles # 2.811 GHz + 39,572,349,146 instructions # 3.02 insn per cycle + 4.665178116 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.016507e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.032941e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.032941e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.088722e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.106050e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.106050e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.058059 sec +TOTAL : 2.038539 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,631,279,447 cycles # 2.732 GHz - 13,836,775,240 instructions # 2.46 insn per cycle - 2.062638485 seconds time elapsed + 5,622,073,957 cycles # 2.753 GHz + 13,834,285,866 instructions # 2.46 insn per cycle + 2.043264664 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.172752e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.195878e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.195878e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.180474e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.202665e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.202665e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.800276 sec +TOTAL : 1.797187 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,940,734,767 cycles # 2.740 GHz - 12,518,660,568 instructions # 2.53 insn per cycle - 1.804734715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) + 4,937,816,969 cycles # 2.742 GHz + 12,516,988,109 instructions # 2.53 insn per cycle + 1.801993078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.912888e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.925297e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.925297e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.979563e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.992901e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.992901e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.384873 sec +TOTAL : 2.360775 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,161,817,801 cycles # 1.743 GHz - 6,405,054,448 instructions # 1.54 insn per cycle - 2.389410885 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) + 4,161,078,836 cycles # 1.760 GHz + 6,405,054,232 instructions # 1.54 insn per cycle + 2.365459011 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index fc2e4b7aa0..14d3e456fd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:59:16 +DATE: 2024-09-18_13:06:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.322702e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.346002e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.347615e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.295730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.322229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.324249e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.517480 sec +TOTAL : 0.520677 sec INFO: No Floating Point Exceptions have been reported - 2,154,192,085 cycles # 2.875 GHz - 3,384,532,263 instructions # 1.57 insn per cycle - 0.808566781 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common + 2,148,428,756 cycles # 2.846 GHz + 3,383,382,873 instructions # 1.57 insn per cycle + 0.814124974 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.137866e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168584e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133518e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.163906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165159e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.158061 sec +TOTAL : 3.142841 sec INFO: No Floating Point Exceptions have been reported - 9,917,250,541 cycles # 2.905 GHz - 22,199,780,027 instructions # 2.24 insn per cycle - 3.469925481 seconds time elapsed + 9,913,272,493 cycles # 2.915 GHz + 21,406,834,972 instructions # 2.16 insn per cycle + 3.457413936 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.876744e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877706e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877706e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.883085e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.883982e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.883982e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.747062 sec +TOTAL : 8.717475 sec INFO: No Floating Point Exceptions have been reported - 25,648,612,426 cycles # 2.931 GHz - 78,952,780,288 instructions # 3.08 insn per cycle - 8.750949596 seconds time elapsed + 25,650,608,073 cycles # 2.942 GHz + 78,955,783,568 instructions # 3.08 insn per cycle + 8.721774217 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.516490e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519795e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.519795e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.525568e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.528763e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.528763e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.672214 sec +TOTAL : 4.659766 sec INFO: No Floating Point Exceptions have been reported - 13,064,181,413 cycles # 2.795 GHz - 39,557,975,845 instructions # 3.03 insn per cycle - 4.676162125 seconds time elapsed + 13,093,152,498 cycles # 2.808 GHz + 39,558,598,891 instructions # 3.02 insn per cycle + 4.664046020 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.035027e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.051957e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.051957e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.044580e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.060991e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.060991e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.050050 sec +TOTAL : 2.046898 sec INFO: No Floating Point Exceptions have been reported - 5,620,854,320 cycles # 2.738 GHz - 13,824,518,317 instructions # 2.46 insn per cycle - 2.054209584 seconds time elapsed + 5,615,094,940 cycles # 2.739 GHz + 13,822,846,005 instructions # 2.46 insn per cycle + 2.051140101 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.147098e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.168017e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.168017e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.166765e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.187474e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.187474e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.801905 sec +TOTAL : 1.797318 sec INFO: No Floating Point Exceptions have been reported - 4,929,552,945 cycles # 2.731 GHz - 12,503,971,607 instructions # 2.54 insn per cycle - 1.805808510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) + 4,920,345,742 cycles # 2.732 GHz + 12,503,437,535 instructions # 2.54 insn per cycle + 1.801597465 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.940588e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.953237e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.953237e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.991343e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.003364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.003364e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.372033 sec +TOTAL : 2.354011 sec INFO: No Floating Point Exceptions have been reported - 4,149,036,914 cycles # 1.747 GHz - 6,390,952,192 instructions # 1.54 insn per cycle - 2.375889237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) + 4,141,327,319 cycles # 1.757 GHz + 6,390,315,143 instructions # 1.54 insn per cycle + 2.358468154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 9898610236..5ee24db0d6 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:56:23 +DATE: 2024-09-18_13:03:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.313175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.336829e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.338592e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.306466e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.330597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.332575e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.516520 sec +TOTAL : 0.519642 sec INFO: No Floating Point Exceptions have been reported - 2,145,509,423 cycles # 2.867 GHz - 3,354,996,461 instructions # 1.56 insn per cycle - 0.808722776 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst + 2,158,072,451 cycles # 2.867 GHz + 3,417,619,447 instructions # 1.58 insn per cycle + 0.812681086 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.143109e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.171681e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.172874e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.138609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.169214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170473e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.078670 sec +TOTAL : 3.082389 sec INFO: No Floating Point Exceptions have been reported - 9,691,265,584 cycles # 2.906 GHz - 21,333,506,403 instructions # 2.20 insn per cycle - 3.390410721 seconds time elapsed + 9,772,793,139 cycles # 2.928 GHz + 21,341,571,954 instructions # 2.18 insn per cycle + 3.396434043 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.881675e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.882585e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.882585e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.882405e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.883286e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.883286e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.722848 sec +TOTAL : 8.718737 sec INFO: No Floating Point Exceptions have been reported - 25,651,425,779 cycles # 2.940 GHz - 78,952,412,711 instructions # 3.08 insn per cycle - 8.726726338 seconds time elapsed + 25,644,402,521 cycles # 2.941 GHz + 78,955,748,181 instructions # 3.08 insn per cycle + 8.722884517 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.517566e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.520849e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.546461e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.549706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.549706e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.669337 sec +TOTAL : 4.630610 sec INFO: No Floating Point Exceptions have been reported - 13,090,805,200 cycles # 2.802 GHz - 39,558,366,602 instructions # 3.02 insn per cycle - 4.673224740 seconds time elapsed + 13,065,342,955 cycles # 2.819 GHz + 39,558,576,157 instructions # 3.03 insn per cycle + 4.634990681 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.039895e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.055822e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.055822e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.040236e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.056583e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.056583e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.046582 sec +TOTAL : 2.046187 sec INFO: No Floating Point Exceptions have been reported - 5,609,737,694 cycles # 2.737 GHz - 13,824,245,582 instructions # 2.46 insn per cycle - 2.050512234 seconds time elapsed + 5,614,658,100 cycles # 2.739 GHz + 13,823,752,036 instructions # 2.46 insn per cycle + 2.050408597 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.131967e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.152226e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.152226e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.219242e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.241638e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.241638e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.803136 sec +TOTAL : 1.785361 sec INFO: No Floating Point Exceptions have been reported - 4,925,220,417 cycles # 2.727 GHz - 12,506,075,504 instructions # 2.54 insn per cycle - 1.807026447 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) + 4,914,777,250 cycles # 2.747 GHz + 12,505,304,491 instructions # 2.54 insn per cycle + 1.789665479 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.955213e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.967580e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.967580e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.996219e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.008754e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.008754e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.365271 sec +TOTAL : 2.350947 sec INFO: No Floating Point Exceptions have been reported - 4,141,642,997 cycles # 1.749 GHz - 6,391,703,659 instructions # 1.54 insn per cycle - 2.369209251 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) + 4,136,746,733 cycles # 1.757 GHz + 6,392,473,320 instructions # 1.55 insn per cycle + 2.355422920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 794fb1a802..bbefe2a8e4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,76 +11,76 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:53:35 +DATE: 2024-09-18_13:00:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.043181e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.332865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.334802e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.061167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.349241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.351100e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.523228 sec +TOTAL : 0.520345 sec INFO: No Floating Point Exceptions have been reported - 2,169,607,107 cycles # 2.877 GHz - 3,459,237,306 instructions # 1.59 insn per cycle - 0.815202021 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst + 2,166,689,738 cycles # 2.876 GHz + 3,445,065,863 instructions # 1.59 insn per cycle + 0.812996601 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.724697e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.162122e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.163349e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.727515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.166918e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168161e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.220306 sec +TOTAL : 3.213299 sec INFO: No Floating Point Exceptions have been reported - 10,133,189,238 cycles # 2.914 GHz - 23,148,993,968 instructions # 2.28 insn per cycle - 3.534065636 seconds time elapsed + 10,170,057,078 cycles # 2.920 GHz + 23,084,015,508 instructions # 2.27 insn per cycle + 3.538645884 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.879192e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.880098e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880098e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.884169e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885066e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885066e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.734016 sec +TOTAL : 8.710738 sec INFO: No Floating Point Exceptions have been reported - 25,661,507,280 cycles # 2.937 GHz - 78,953,590,713 instructions # 3.08 insn per cycle - 8.737772518 seconds time elapsed + 25,636,302,572 cycles # 2.942 GHz + 78,955,597,829 instructions # 3.08 insn per cycle + 8.714991120 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.527947e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.531121e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.531121e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.516471e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519747e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519747e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.656259 sec +TOTAL : 4.670214 sec INFO: No Floating Point Exceptions have been reported - 13,056,695,540 cycles # 2.803 GHz - 39,560,471,761 instructions # 3.03 insn per cycle - 4.660116201 seconds time elapsed + 13,077,998,657 cycles # 2.798 GHz + 39,560,581,640 instructions # 3.02 insn per cycle + 4.674479243 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.991321e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.007389e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.007389e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.487681e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.501903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.501903e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.059547 sec +TOTAL : 2.196522 sec INFO: No Floating Point Exceptions have been reported - 5,612,692,411 cycles # 2.721 GHz - 13,825,461,651 instructions # 2.46 insn per cycle - 2.063599543 seconds time elapsed + 6,031,690,352 cycles # 2.742 GHz + 13,823,991,565 instructions # 2.29 insn per cycle + 2.200855114 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.168393e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.189788e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.189788e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.160431e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.182355e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.182355e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.795888 sec +TOTAL : 1.796714 sec INFO: No Floating Point Exceptions have been reported - 4,923,088,043 cycles # 2.737 GHz - 12,506,721,234 instructions # 2.54 insn per cycle - 1.799844991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 88) (512z: 0) + 4,915,575,489 cycles # 2.731 GHz + 12,505,831,482 instructions # 2.54 insn per cycle + 1.801025403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -218,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.949785e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.962226e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.962226e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.864510e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.876687e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.876687e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.367179 sec +TOTAL : 2.395867 sec INFO: No Floating Point Exceptions have been reported - 4,144,948,568 cycles # 1.750 GHz - 6,391,796,529 instructions # 1.54 insn per cycle - 2.371216811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1975) (512y: 102) (512z: 9386) + 4,162,633,573 cycles # 1.735 GHz + 6,392,322,352 instructions # 1.54 insn per cycle + 2.400290914 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -250,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 80da0089a3..724af1477d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:16:25 +DATE: 2024-09-18_12:16:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.297695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.326014e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.328135e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.313099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.338786e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340713e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536243 sec +TOTAL : 0.536981 sec INFO: No Floating Point Exceptions have been reported - 2,198,395,650 cycles # 2.855 GHz - 3,395,768,128 instructions # 1.54 insn per cycle - 0.828484590 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 + 2,207,441,775 cycles # 2.862 GHz + 3,435,949,472 instructions # 1.56 insn per cycle + 0.828882621 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139738e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.170223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.171507e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.143532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.173218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174465e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.043962 sec +TOTAL : 3.036857 sec INFO: No Floating Point Exceptions have been reported - 9,646,114,898 cycles # 2.920 GHz - 22,170,499,370 instructions # 2.30 insn per cycle - 3.361578134 seconds time elapsed + 9,590,647,679 cycles # 2.910 GHz + 22,042,753,111 instructions # 2.30 insn per cycle + 3.351786655 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158133E-004 Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884927e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885834e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885834e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.884971e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885850e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885850e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.707889 sec +TOTAL : 8.706995 sec INFO: No Floating Point Exceptions have been reported - 25,619,332,595 cycles # 2.941 GHz - 78,702,929,908 instructions # 3.07 insn per cycle - 8.712420077 seconds time elapsed + 25,617,517,247 cycles # 2.941 GHz + 78,701,000,615 instructions # 3.07 insn per cycle + 8.711338338 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.574307e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.577560e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.577560e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.566075e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.569356e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.569356e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.595188 sec +TOTAL : 4.605078 sec INFO: No Floating Point Exceptions have been reported - 13,048,399,086 cycles # 2.838 GHz - 39,450,691,251 instructions # 3.02 insn per cycle - 4.599210719 seconds time elapsed + 13,036,001,618 cycles # 2.829 GHz + 39,449,493,817 instructions # 3.03 insn per cycle + 4.609408106 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.930739e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.946422e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.946422e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.966836e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.982546e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.982546e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.074898 sec +TOTAL : 2.064889 sec INFO: No Floating Point Exceptions have been reported - 5,675,338,380 cycles # 2.732 GHz - 13,910,840,784 instructions # 2.45 insn per cycle - 2.079006346 seconds time elapsed + 5,676,808,859 cycles # 2.745 GHz + 13,911,294,100 instructions # 2.45 insn per cycle + 2.069253381 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.062486e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.083766e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.083766e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.081065e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.102389e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.102389e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.816561 sec +TOTAL : 1.812330 sec INFO: No Floating Point Exceptions have been reported - 4,996,440,015 cycles # 2.746 GHz - 12,603,390,155 instructions # 2.52 insn per cycle - 1.820566072 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 240) (512z: 0) + 4,986,765,093 cycles # 2.746 GHz + 12,602,417,777 instructions # 2.53 insn per cycle + 1.816710814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 241) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.965367e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.977715e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.977715e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.944688e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.956851e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.956851e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.361480 sec +TOTAL : 2.368181 sec INFO: No Floating Point Exceptions have been reported - 4,159,091,159 cycles # 1.759 GHz - 6,499,576,244 instructions # 1.56 insn per cycle - 2.365402468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1751) (512y: 194) (512z: 9382) + 4,157,079,693 cycles # 1.753 GHz + 6,500,343,598 instructions # 1.56 insn per cycle + 2.372472342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1754) (512y: 193) (512z: 9382) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198157320E-004 Relative difference = 2.837296634927675e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index a149b91e1f..9c62ee596f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:36:57 +DATE: 2024-09-18_12:45:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.106076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.131267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.133080e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.107911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.129674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131105e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.544695 sec +TOTAL : 0.542394 sec INFO: No Floating Point Exceptions have been reported - 2,219,428,600 cycles # 2.858 GHz - 3,493,527,234 instructions # 1.57 insn per cycle - 0.834499500 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 + 2,239,231,771 cycles # 2.882 GHz + 3,498,325,403 instructions # 1.56 insn per cycle + 0.833980513 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.753348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.778591e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.779655e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.758554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784685e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.312449 sec +TOTAL : 3.309606 sec INFO: No Floating Point Exceptions have been reported - 10,421,899,087 cycles # 2.913 GHz - 24,058,421,553 instructions # 2.31 insn per cycle - 3.636091022 seconds time elapsed + 10,428,358,714 cycles # 2.922 GHz + 23,876,781,455 instructions # 2.29 insn per cycle + 3.623869439 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158122E-004 Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.268976e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.269450e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.269450e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.278929e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.279396e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.279396e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.423779 sec +TOTAL : 38.335228 sec INFO: No Floating Point Exceptions have been reported - 112,730,268,623 cycles # 2.934 GHz - 144,772,135,406 instructions # 1.28 insn per cycle - 38.427951659 seconds time elapsed + 112,569,296,340 cycles # 2.936 GHz + 144,793,904,773 instructions # 1.29 insn per cycle + 38.339626690 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.077363e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.079742e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.079742e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.146613e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.149188e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.149188e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.335511 sec +TOTAL : 5.218296 sec INFO: No Floating Point Exceptions have been reported - 14,752,370,812 cycles # 2.763 GHz - 37,645,694,563 instructions # 2.55 insn per cycle - 5.339828429 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2: 0) (512y: 0) (512z: 0) + 14,745,365,482 cycles # 2.824 GHz + 37,604,718,701 instructions # 2.55 insn per cycle + 5.222619147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.332306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.345602e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.345602e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.373915e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.387237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.387237e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.243210 sec +TOTAL : 2.230777 sec INFO: No Floating Point Exceptions have been reported - 6,130,370,628 cycles # 2.729 GHz - 13,060,931,234 instructions # 2.13 insn per cycle - 2.247428561 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46973) (512y: 0) (512z: 0) + 6,114,551,945 cycles # 2.737 GHz + 13,052,964,850 instructions # 2.13 insn per cycle + 2.235150749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.812729e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.832105e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.832105e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.869797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.889489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.889489e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.867280 sec +TOTAL : 1.855437 sec INFO: No Floating Point Exceptions have been reported - 5,063,580,201 cycles # 2.707 GHz - 11,453,397,200 instructions # 2.26 insn per cycle - 1.871531437 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40498) (512y: 285) (512z: 0) + 5,079,069,827 cycles # 2.732 GHz + 11,450,297,808 instructions # 2.25 insn per cycle + 1.859852844 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40486) (512y: 285) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.266726e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.280024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.280024e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.334322e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.348410e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.348410e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.263638 sec +TOTAL : 2.242689 sec INFO: No Floating Point Exceptions have been reported - 3,957,788,966 cycles # 1.746 GHz - 5,926,468,977 instructions # 1.50 insn per cycle - 2.267826067 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39349) + 3,955,754,497 cycles # 1.761 GHz + 5,927,045,148 instructions # 1.50 insn per cycle + 2.247181135 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39338) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index c0add05aa1..af0b172ab7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:38:07 +DATE: 2024-09-18_12:46:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.096121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.123086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.125058e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.101802e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.121265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.122962e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.541972 sec +TOTAL : 0.540843 sec INFO: No Floating Point Exceptions have been reported - 2,219,963,696 cycles # 2.870 GHz - 3,470,909,979 instructions # 1.56 insn per cycle - 0.830712751 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 + 2,223,817,024 cycles # 2.871 GHz + 3,385,583,234 instructions # 1.52 insn per cycle + 0.831231377 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.756387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.782287e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.783295e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.740756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765684e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766660e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.298693 sec +TOTAL : 3.304935 sec INFO: No Floating Point Exceptions have been reported - 10,399,316,447 cycles # 2.922 GHz - 23,584,057,660 instructions # 2.27 insn per cycle - 3.614068267 seconds time elapsed + 10,396,192,831 cycles # 2.917 GHz + 23,795,713,123 instructions # 2.29 insn per cycle + 3.619511438 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266731198158122E-004 Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.224460e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.224899e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.224899e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.220488e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.220945e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.220945e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.827526 sec +TOTAL : 38.865543 sec INFO: No Floating Point Exceptions have been reported - 113,783,414,735 cycles # 2.930 GHz - 144,278,309,276 instructions # 1.27 insn per cycle - 38.831628591 seconds time elapsed + 114,075,746,984 cycles # 2.935 GHz + 144,284,837,728 instructions # 1.26 insn per cycle + 38.869913276 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140450E-004 Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.989108e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.991357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.991357e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.002635e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.004951e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.004951e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.492626 sec +TOTAL : 5.467988 sec INFO: No Floating Point Exceptions have been reported - 15,275,599,565 cycles # 2.780 GHz - 38,389,599,156 instructions # 2.51 insn per cycle - 5.496788286 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2: 0) (512y: 0) (512z: 0) + 15,296,909,197 cycles # 2.796 GHz + 37,837,176,497 instructions # 2.47 insn per cycle + 5.472337784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141209E-004 Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.497881e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.512338e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.512338e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.512966e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.527080e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.527080e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.193501 sec +TOTAL : 2.189426 sec INFO: No Floating Point Exceptions have been reported - 6,019,122,923 cycles # 2.740 GHz - 12,933,620,431 instructions # 2.15 insn per cycle - 2.197765722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46099) (512y: 0) (512z: 0) + 6,002,714,707 cycles # 2.737 GHz + 12,921,820,063 instructions # 2.15 insn per cycle + 2.193921042 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46048) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.839318e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.859960e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.859960e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.859047e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.878725e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.878725e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.861821 sec +TOTAL : 1.857736 sec INFO: No Floating Point Exceptions have been reported - 5,093,783,286 cycles # 2.731 GHz - 11,449,481,812 instructions # 2.25 insn per cycle - 1.866150033 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40142) (512y: 219) (512z: 0) + 5,096,589,479 cycles # 2.738 GHz + 11,450,886,914 instructions # 2.25 insn per cycle + 1.862161811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40151) (512y: 219) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.279822e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.293417e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.293417e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.316370e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.329769e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.329769e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.259516 sec +TOTAL : 2.247849 sec INFO: No Floating Point Exceptions have been reported - 3,958,337,222 cycles # 1.750 GHz - 5,889,113,860 instructions # 1.49 insn per cycle - 2.263750575 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38927) + 3,953,949,727 cycles # 1.756 GHz + 5,894,038,279 instructions # 1.49 insn per cycle + 2.252346875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38977) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198156789E-004 Relative difference = 2.837296715097453e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index f53bdfcb06..90e270bc8d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:16:59 +DATE: 2024-09-18_12:17:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.467249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.509285e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.513718e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.485010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.524901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.528717e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.494218 sec +TOTAL : 0.493784 sec INFO: No Floating Point Exceptions have been reported - 2,034,103,432 cycles # 2.860 GHz - 3,045,186,386 instructions # 1.50 insn per cycle - 0.768364063 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 + 2,047,150,406 cycles # 2.872 GHz + 3,017,206,545 instructions # 1.47 insn per cycle + 0.769521849 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.128844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.190571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.193248e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.130872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.193752e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.796967 sec +TOTAL : 1.801872 sec INFO: No Floating Point Exceptions have been reported - 5,948,917,067 cycles # 2.924 GHz - 12,254,957,631 instructions # 2.06 insn per cycle - 2.089755272 seconds time elapsed + 5,918,779,581 cycles # 2.909 GHz + 12,693,441,452 instructions # 2.14 insn per cycle + 2.093566853 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.939424e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940380e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.942960e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943920e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943920e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.461205 sec +TOTAL : 8.446218 sec INFO: No Floating Point Exceptions have been reported - 24,939,277,475 cycles # 2.947 GHz - 79,109,068,255 instructions # 3.17 insn per cycle - 8.465315543 seconds time elapsed + 24,891,970,806 cycles # 2.946 GHz + 79,110,184,615 instructions # 3.18 insn per cycle + 8.450517031 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.989306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.001573e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.001573e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.000853e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.014105e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.014105e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.351266 sec +TOTAL : 2.347412 sec INFO: No Floating Point Exceptions have been reported - 6,525,064,847 cycles # 2.771 GHz - 20,269,487,959 instructions # 3.11 insn per cycle - 2.355049106 seconds time elapsed + 6,535,913,878 cycles # 2.780 GHz + 20,270,850,285 instructions # 3.10 insn per cycle + 2.351723425 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.582613e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.589051e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.589051e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.599290e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.605892e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.605892e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.041194 sec +TOTAL : 1.030594 sec INFO: No Floating Point Exceptions have been reported - 2,848,829,047 cycles # 2.729 GHz - 7,065,493,216 instructions # 2.48 insn per cycle - 1.044894531 seconds time elapsed + 2,836,963,276 cycles # 2.743 GHz + 7,065,994,832 instructions # 2.49 insn per cycle + 1.034860296 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.794003e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.802231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.802231e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.795295e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.803482e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.803482e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.919231 sec +TOTAL : 0.918514 sec INFO: No Floating Point Exceptions have been reported - 2,522,001,135 cycles # 2.735 GHz - 6,403,495,458 instructions # 2.54 insn per cycle - 0.923373159 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) + 2,528,652,589 cycles # 2.743 GHz + 6,403,959,518 instructions # 2.53 insn per cycle + 0.922696206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.403418e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.408437e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.408437e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.410082e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.415209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415209e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.173408 sec +TOTAL : 1.168400 sec INFO: No Floating Point Exceptions have been reported - 2,065,585,282 cycles # 1.756 GHz - 3,303,212,083 instructions # 1.60 insn per cycle - 1.177101647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) + 2,072,435,771 cycles # 1.768 GHz + 3,304,546,208 instructions # 1.59 insn per cycle + 1.172720772 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 99ccf0b7c6..c66db7ae78 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:48:48 +DATE: 2024-09-18_12:55:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.945945e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.468849e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.468849e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.970193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.498612e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498612e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.482042 sec +TOTAL : 0.479926 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,997,062,353 cycles # 2.864 GHz - 3,031,546,242 instructions # 1.52 insn per cycle - 0.755384112 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 2,011,379,748 cycles # 2.884 GHz + 3,038,247,862 instructions # 1.51 insn per cycle + 0.753810180 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.954589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.017029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.017029e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.940879e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.083233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.083233e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 2.150287 sec +TOTAL : 1.970426 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,911,046,669 cycles # 2.894 GHz - 9,720,301,924 instructions # 1.41 insn per cycle - 2.447262326 seconds time elapsed + 6,440,275,548 cycles # 2.913 GHz + 13,287,281,132 instructions # 2.06 insn per cycle + 2.267959957 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933142e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934088e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934088e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.936854e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.937817e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937817e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.491300 sec +TOTAL : 8.475178 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 24,914,436,852 cycles # 2.933 GHz - 79,112,976,787 instructions # 3.18 insn per cycle - 8.495346137 seconds time elapsed + 24,927,059,080 cycles # 2.940 GHz + 79,118,119,354 instructions # 3.17 insn per cycle + 8.479535627 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.985457e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.998623e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.998623e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.002962e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.015509e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.015509e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.355396 sec +TOTAL : 2.349551 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,536,388,186 cycles # 2.771 GHz - 20,278,657,318 instructions # 3.10 insn per cycle - 2.359340287 seconds time elapsed + 6,544,667,804 cycles # 2.781 GHz + 20,279,974,113 instructions # 3.10 insn per cycle + 2.353974711 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.588469e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.595113e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.595113e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.603853e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.610574e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.610574e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.040170 sec +TOTAL : 1.030576 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,854,653,705 cycles # 2.736 GHz - 7,075,192,119 instructions # 2.48 insn per cycle - 1.044116961 seconds time elapsed + 2,847,456,615 cycles # 2.753 GHz + 7,075,989,633 instructions # 2.49 insn per cycle + 1.035024707 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.764583e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.772721e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.772721e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.785349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.793696e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.793696e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.937436 sec +TOTAL : 0.926691 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,529,004,808 cycles # 2.688 GHz - 6,413,196,189 instructions # 2.54 insn per cycle - 0.941494819 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) + 2,540,934,134 cycles # 2.731 GHz + 6,413,438,200 instructions # 2.52 insn per cycle + 0.931148836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.393518e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.398724e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.398724e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.400821e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405962e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405962e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.184860 sec +TOTAL : 1.179178 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,077,752,907 cycles # 1.749 GHz - 3,313,647,639 instructions # 1.59 insn per cycle - 1.188846310 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) + 2,081,047,712 cycles # 1.760 GHz + 3,314,864,763 instructions # 1.59 insn per cycle + 1.183503546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 19f64c3e7a..3aa8ed158e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:59:51 +DATE: 2024-09-18_13:06:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.517186e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.553165e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.556664e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.472678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.513045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.517154e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 -TOTAL : 0.477606 sec +TOTAL : 0.480872 sec INFO: No Floating Point Exceptions have been reported - 1,989,569,741 cycles # 2.873 GHz - 3,005,042,417 instructions # 1.51 insn per cycle - 0.749585148 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common + 2,012,103,314 cycles # 2.880 GHz + 2,956,061,319 instructions # 1.47 insn per cycle + 0.756135044 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.132207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.190283e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.192869e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.032542e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.093526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096446e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.892599 sec +TOTAL : 1.887302 sec INFO: No Floating Point Exceptions have been reported - 6,180,255,032 cycles # 2.913 GHz - 13,158,154,431 instructions # 2.13 insn per cycle - 2.179693271 seconds time elapsed + 6,151,892,700 cycles # 2.911 GHz + 12,903,540,079 instructions # 2.10 insn per cycle + 2.177167582 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.919400e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920348e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920348e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.942290e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943248e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943248e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.550896 sec +TOTAL : 8.449460 sec INFO: No Floating Point Exceptions have been reported - 24,917,761,266 cycles # 2.921 GHz - 79,107,928,249 instructions # 3.17 insn per cycle - 8.554412617 seconds time elapsed + 24,927,677,850 cycles # 2.949 GHz + 79,113,674,015 instructions # 3.17 insn per cycle + 8.453509271 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.947395e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.959971e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.959971e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.966325e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.979405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.979405e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.367170 sec +TOTAL : 2.360667 sec INFO: No Floating Point Exceptions have been reported - 6,537,639,637 cycles # 2.759 GHz - 20,270,199,231 instructions # 3.10 insn per cycle - 2.370801699 seconds time elapsed + 6,536,812,483 cycles # 2.766 GHz + 20,271,244,947 instructions # 3.10 insn per cycle + 2.364721005 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.589719e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.596352e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.596352e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.594039e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.600583e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.600583e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.038242 sec +TOTAL : 1.034746 sec INFO: No Floating Point Exceptions have been reported - 2,854,773,942 cycles # 2.742 GHz - 7,065,309,093 instructions # 2.47 insn per cycle - 1.041774606 seconds time elapsed + 2,840,398,673 cycles # 2.736 GHz + 7,064,163,701 instructions # 2.49 insn per cycle + 1.038926233 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.793297e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.801693e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.801693e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.789304e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.797829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.797829e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.921472 sec +TOTAL : 0.922641 sec INFO: No Floating Point Exceptions have been reported - 2,523,779,273 cycles # 2.730 GHz - 6,401,399,707 instructions # 2.54 insn per cycle - 0.925110369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) + 2,530,877,890 cycles # 2.733 GHz + 6,400,607,448 instructions # 2.53 insn per cycle + 0.926747674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.398357e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403401e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403401e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.398241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403280e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.179617 sec +TOTAL : 1.179849 sec INFO: No Floating Point Exceptions have been reported - 2,071,965,297 cycles # 1.751 GHz - 3,301,502,867 instructions # 1.59 insn per cycle - 1.184374263 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) + 2,072,557,863 cycles # 1.752 GHz + 3,302,114,927 instructions # 1.59 insn per cycle + 1.183970001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 71166778fc..383503bdc9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:56:57 +DATE: 2024-09-18_13:03:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.530906e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.566446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.569964e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.456336e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.498999e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.503203e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.476038 sec +TOTAL : 0.480678 sec INFO: No Floating Point Exceptions have been reported - 1,985,362,661 cycles # 2.871 GHz - 2,993,731,363 instructions # 1.51 insn per cycle - 0.748126319 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst + 1,979,230,420 cycles # 2.841 GHz + 2,971,475,191 instructions # 1.50 insn per cycle + 0.754740237 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.141583e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.198514e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.201079e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.127471e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.189319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.192138e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.826652 sec +TOTAL : 1.829247 sec INFO: No Floating Point Exceptions have been reported - 5,952,227,239 cycles # 2.897 GHz - 12,995,171,990 instructions # 2.18 insn per cycle - 2.112711437 seconds time elapsed + 6,015,541,951 cycles # 2.921 GHz + 13,102,927,362 instructions # 2.18 insn per cycle + 2.117747585 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933291e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934237e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934237e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.933929e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934853e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934853e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.487946 sec +TOTAL : 8.485517 sec INFO: No Floating Point Exceptions have been reported - 24,924,562,928 cycles # 2.936 GHz - 79,109,269,886 instructions # 3.17 insn per cycle - 8.491506444 seconds time elapsed + 24,920,337,024 cycles # 2.936 GHz + 79,110,004,323 instructions # 3.17 insn per cycle + 8.489672562 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.966222e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.978443e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.978443e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.908881e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.921968e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.921968e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.359225 sec +TOTAL : 2.378636 sec INFO: No Floating Point Exceptions have been reported - 6,526,176,570 cycles # 2.763 GHz - 20,269,541,830 instructions # 3.11 insn per cycle - 2.362842375 seconds time elapsed + 6,534,965,408 cycles # 2.745 GHz + 20,270,944,694 instructions # 3.10 insn per cycle + 2.382810423 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.543496e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.549592e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.549592e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.596709e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.603236e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.603236e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.067538 sec +TOTAL : 1.032159 sec INFO: No Floating Point Exceptions have been reported - 2,860,867,267 cycles # 2.672 GHz - 7,065,461,760 instructions # 2.47 insn per cycle - 1.071393478 seconds time elapsed + 2,835,702,890 cycles # 2.738 GHz + 7,066,012,611 instructions # 2.49 insn per cycle + 1.036273546 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.800483e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.808998e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.808998e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.794868e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.803140e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.803140e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.915759 sec +TOTAL : 0.918666 sec INFO: No Floating Point Exceptions have been reported - 2,516,887,683 cycles # 2.739 GHz - 6,403,177,488 instructions # 2.54 insn per cycle - 0.919482910 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) + 2,525,424,797 cycles # 2.739 GHz + 6,403,502,842 instructions # 2.54 insn per cycle + 0.922861156 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.400235e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.405247e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.405247e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.410052e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.415143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415143e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.176437 sec +TOTAL : 1.168246 sec INFO: No Floating Point Exceptions have been reported - 2,065,991,158 cycles # 1.752 GHz - 3,303,729,120 instructions # 1.60 insn per cycle - 1.180108788 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) + 2,068,179,462 cycles # 1.765 GHz + 3,303,875,484 instructions # 1.60 insn per cycle + 1.172542272 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index dd6ac10521..7797c46a19 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,76 +11,76 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:54:10 +DATE: 2024-09-18_13:00:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.065242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.543001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.546426e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.992477e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.494287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498231e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.481262 sec +TOTAL : 0.481963 sec INFO: No Floating Point Exceptions have been reported - 1,996,897,335 cycles # 2.872 GHz - 2,970,634,149 instructions # 1.49 insn per cycle - 0.754034663 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst + 1,997,965,324 cycles # 2.853 GHz + 2,939,834,102 instructions # 1.47 insn per cycle + 0.757193064 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.141287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.200996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.203446e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.118190e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.192549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.195361e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.902964 sec +TOTAL : 1.905617 sec INFO: No Floating Point Exceptions have been reported - 6,190,673,302 cycles # 2.901 GHz - 13,306,269,630 instructions # 2.15 insn per cycle - 2.189368892 seconds time elapsed + 6,204,653,970 cycles # 2.904 GHz + 11,932,036,366 instructions # 1.92 insn per cycle + 2.194579719 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933487e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934407e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934407e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.934899e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935850e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935850e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.487231 sec +TOTAL : 8.481229 sec INFO: No Floating Point Exceptions have been reported - 24,900,546,223 cycles # 2.933 GHz - 79,107,234,987 instructions # 3.18 insn per cycle - 8.490956598 seconds time elapsed + 24,933,908,474 cycles # 2.939 GHz + 79,109,779,876 instructions # 3.17 insn per cycle + 8.485474778 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274863312764526E-004 Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.983704e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.996217e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.996217e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.954399e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.967143e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.967143e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.353085 sec +TOTAL : 2.363281 sec INFO: No Floating Point Exceptions have been reported - 6,541,995,614 cycles # 2.777 GHz - 20,269,407,860 instructions # 3.10 insn per cycle - 2.356873297 seconds time elapsed + 6,535,222,026 cycles # 2.761 GHz + 20,271,091,445 instructions # 3.10 insn per cycle + 2.367564480 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861442972011E-004 Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.585450e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.591953e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.591953e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.592187e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.598658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.598658e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.039372 sec +TOTAL : 1.035113 sec INFO: No Floating Point Exceptions have been reported - 2,850,375,088 cycles # 2.735 GHz - 7,065,899,998 instructions # 2.48 insn per cycle - 1.043028953 seconds time elapsed + 2,837,322,925 cycles # 2.732 GHz + 7,065,851,947 instructions # 2.49 insn per cycle + 1.039614272 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.792341e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800787e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800787e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.786472e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794657e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.794657e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.920003 sec +TOTAL : 0.923192 sec INFO: No Floating Point Exceptions have been reported - 2,517,551,147 cycles # 2.728 GHz - 6,403,207,803 instructions # 2.54 insn per cycle - 0.923687532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11021) (512y: 43) (512z: 0) + 2,528,197,649 cycles # 2.730 GHz + 6,403,497,083 instructions # 2.53 insn per cycle + 0.927414591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -218,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271938174396888E-004 Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.402198e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.407285e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.407285e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.394144e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.399234e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.399234e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.174725 sec +TOTAL : 1.181792 sec INFO: No Floating Point Exceptions have been reported - 2,068,104,176 cycles # 1.756 GHz - 3,303,725,822 instructions # 1.60 insn per cycle - 1.178407380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2601) (512y: 46) (512z: 9605) + 2,068,985,618 cycles # 1.745 GHz + 3,303,850,767 instructions # 1.60 insn per cycle + 1.186123644 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -250,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952779718007E-004 Relative difference = 4.194411063934945e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 0807d31ee5..9b731718b7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:17:26 +DATE: 2024-09-18_12:17:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.482391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.527725e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.532031e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.454590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.492804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.497193e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493760 sec +TOTAL : 0.495704 sec INFO: No Floating Point Exceptions have been reported - 2,047,228,800 cycles # 2.877 GHz - 3,039,242,832 instructions # 1.48 insn per cycle - 0.768472979 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 + 2,032,995,153 cycles # 2.848 GHz + 2,991,224,667 instructions # 1.47 insn per cycle + 0.774166376 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.102852e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.164567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.167207e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.094149e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.154905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.157610e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.802499 sec +TOTAL : 1.805319 sec INFO: No Floating Point Exceptions have been reported - 5,908,213,594 cycles # 2.909 GHz - 12,456,477,911 instructions # 2.11 insn per cycle - 2.093284072 seconds time elapsed + 5,914,324,613 cycles # 2.902 GHz + 11,873,756,543 instructions # 2.01 insn per cycle + 2.096430893 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262667672387088E-004 Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.936218e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.937180e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.937180e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.929536e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.930480e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.930480e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.475402 sec +TOTAL : 8.504728 sec INFO: No Floating Point Exceptions have been reported - 24,949,332,764 cycles # 2.943 GHz - 78,839,555,653 instructions # 3.16 insn per cycle - 8.479529977 seconds time elapsed + 25,015,654,943 cycles # 2.941 GHz + 78,847,702,433 instructions # 3.15 insn per cycle + 8.508857223 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 Avg ME (F77/C++) = 6.6274866250177339E-004 Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.122699e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.135567e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.135567e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.178831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.192718e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.192718e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.306947 sec +TOTAL : 2.289338 sec INFO: No Floating Point Exceptions have been reported - 6,466,639,499 cycles # 2.800 GHz - 20,230,851,658 instructions # 3.13 insn per cycle - 2.310638118 seconds time elapsed + 6,463,318,702 cycles # 2.819 GHz + 20,229,880,790 instructions # 3.13 insn per cycle + 2.293529801 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274861448331612E-004 Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.507818e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.513887e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.513887e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.520587e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.526569e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.526569e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.092466 sec +TOTAL : 1.083432 sec INFO: No Floating Point Exceptions have been reported - 2,980,915,950 cycles # 2.722 GHz - 7,206,628,057 instructions # 2.42 insn per cycle - 1.096222389 seconds time elapsed + 2,984,403,957 cycles # 2.746 GHz + 7,207,167,499 instructions # 2.41 insn per cycle + 1.087697042 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668088170E-004 Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.724603e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.732183e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.732183e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.733677e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.741677e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.741677e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.955874 sec +TOTAL : 0.950819 sec INFO: No Floating Point Exceptions have been reported - 2,613,667,112 cycles # 2.726 GHz - 6,544,516,026 instructions # 2.50 insn per cycle - 0.959652526 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 26) (512z: 0) + 2,611,989,316 cycles # 2.737 GHz + 6,545,448,351 instructions # 2.51 insn per cycle + 0.954971597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 27) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627194e-04 Avg ME (F77/C++) = 6.6271939668088170E-004 Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.352025e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.356715e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.356715e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.366907e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.371833e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.371833e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.218129 sec +TOTAL : 1.204851 sec INFO: No Floating Point Exceptions have been reported - 2,137,040,914 cycles # 1.750 GHz - 3,460,849,319 instructions # 1.62 insn per cycle - 1.221974093 seconds time elapsed + 2,138,789,905 cycles # 1.770 GHz + 3,461,611,954 instructions # 1.62 insn per cycle + 1.209183599 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627195e-04 Avg ME (F77/C++) = 6.6271952032316561E-004 Relative difference = 3.066631594207157e-08 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 507a64eed8..2cbba9e698 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:39:16 +DATE: 2024-09-18_12:47:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.567838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.605874e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.609566e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.579593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.616784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620542e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.496412 sec +TOTAL : 0.508462 sec INFO: No Floating Point Exceptions have been reported - 2,051,587,227 cycles # 2.873 GHz - 3,025,794,403 instructions # 1.47 insn per cycle - 0.774558823 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 + 2,050,083,222 cycles # 2.848 GHz + 2,995,129,166 instructions # 1.46 insn per cycle + 0.787145254 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.651284e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.721094e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.724249e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.605413e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.675177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.678183e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.731680 sec +TOTAL : 1.737190 sec INFO: No Floating Point Exceptions have been reported - 5,770,677,421 cycles # 2.916 GHz - 12,010,283,008 instructions # 2.08 insn per cycle - 2.035197700 seconds time elapsed + 5,761,752,713 cycles # 2.921 GHz + 12,131,218,179 instructions # 2.11 insn per cycle + 2.028782459 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262669162351490E-004 Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.459828e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.460600e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.460600e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.602317e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.603102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.603102e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.042280 sec +TOTAL : 29.279245 sec INFO: No Floating Point Exceptions have been reported - 86,122,252,676 cycles # 2.867 GHz - 135,657,307,138 instructions # 1.58 insn per cycle - 30.046456599 seconds time elapsed + 85,920,999,170 cycles # 2.934 GHz + 135,650,935,446 instructions # 1.58 insn per cycle + 29.283501695 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 Avg ME (F77/C++) = 6.6275349717465765E-004 Relative difference = 4.26303654465793e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.672428e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.686393e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.686393e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.859267e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.871489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.871489e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.462988 sec +TOTAL : 2.395975 sec INFO: No Floating Point Exceptions have been reported - 6,758,193,786 cycles # 2.742 GHz - 19,357,772,182 instructions # 2.86 insn per cycle - 2.467248153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69591) (avx2: 0) (512y: 0) (512z: 0) + 6,767,487,912 cycles # 2.821 GHz + 19,352,953,840 instructions # 2.86 insn per cycle + 2.400276342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862748188362E-004 Relative difference = 4.14665283800746e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.362305e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.367046e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.367046e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.427993e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433168e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433168e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.208685 sec +TOTAL : 1.153582 sec INFO: No Floating Point Exceptions have been reported - 3,166,621,827 cycles # 2.612 GHz - 6,792,444,940 instructions # 2.15 insn per cycle - 1.212802697 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49012) (512y: 0) (512z: 0) + 3,172,176,609 cycles # 2.741 GHz + 6,794,912,676 instructions # 2.14 insn per cycle + 1.157865028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49034) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731568543797E-004 Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.652877e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.659885e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.659885e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.725737e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.733579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733579e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.997146 sec +TOTAL : 0.955483 sec INFO: No Floating Point Exceptions have been reported - 2,625,468,482 cycles # 2.624 GHz - 5,970,509,824 instructions # 2.27 insn per cycle - 1.001249505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42601) (512y: 11) (512z: 0) + 2,630,257,808 cycles # 2.742 GHz + 5,970,030,267 instructions # 2.27 insn per cycle + 0.959792623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42602) (512y: 11) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731568543797E-004 Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.322992e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.327409e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.327409e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.398705e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403700e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.244528 sec +TOTAL : 1.177833 sec INFO: No Floating Point Exceptions have been reported - 2,076,691,772 cycles # 1.664 GHz - 3,494,505,327 instructions # 1.68 insn per cycle - 1.248709350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5207) (512y: 3) (512z:44836) + 2,074,489,030 cycles # 1.756 GHz + 3,495,482,745 instructions # 1.68 insn per cycle + 1.182176144 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5208) (512y: 3) (512z:44858) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750237027223E-004 Relative difference = 3.5765412974815996e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index c027e74779..307c9cbde7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:40:08 +DATE: 2024-09-18_12:48:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.598287e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.601913e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.556326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.594247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.598112e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.492421 sec +TOTAL : 0.495550 sec INFO: No Floating Point Exceptions have been reported - 2,010,223,902 cycles # 2.826 GHz - 3,031,193,233 instructions # 1.51 insn per cycle - 0.770287796 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 + 2,046,506,588 cycles # 2.866 GHz + 3,036,453,126 instructions # 1.48 insn per cycle + 0.773976715 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.689601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.749985e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.752940e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.676205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.747820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.750770e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.732807 sec +TOTAL : 1.731569 sec INFO: No Floating Point Exceptions have been reported - 5,640,346,322 cycles # 2.872 GHz - 11,210,275,869 instructions # 1.99 insn per cycle - 2.022037581 seconds time elapsed + 5,750,101,661 cycles # 2.911 GHz + 12,015,194,090 instructions # 2.09 insn per cycle + 2.032327922 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626454e-04 Avg ME (F77/GPU) = 6.6262669162351490E-004 Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.446094e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.446837e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.446837e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.582687e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583472e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583472e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 30.118401 sec +TOTAL : 29.381578 sec INFO: No Floating Point Exceptions have been reported - 86,113,084,692 cycles # 2.859 GHz - 135,363,065,912 instructions # 1.57 insn per cycle - 30.122446956 seconds time elapsed + 86,090,574,106 cycles # 2.930 GHz + 135,364,281,032 instructions # 1.57 insn per cycle + 29.385785407 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 Avg ME (F77/C++) = 6.6275349662128086E-004 Relative difference = 5.098002770919431e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.516652e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.527742e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.527742e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.781191e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.793019e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.793019e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.521798 sec +TOTAL : 2.423420 sec INFO: No Floating Point Exceptions have been reported - 6,856,870,344 cycles # 2.715 GHz - 19,407,796,379 instructions # 2.83 insn per cycle - 2.529187527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69622) (avx2: 0) (512y: 0) (512z: 0) + 6,852,713,563 cycles # 2.824 GHz + 19,471,819,479 instructions # 2.84 insn per cycle + 2.427762808 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 Avg ME (F77/C++) = 6.6274862799683282E-004 Relative difference = 4.2243518621014775e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.378784e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.383778e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.383778e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.462291e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.467817e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.467817e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.194647 sec +TOTAL : 1.126460 sec INFO: No Floating Point Exceptions have been reported - 3,106,911,149 cycles # 2.593 GHz - 6,716,375,817 instructions # 2.16 insn per cycle - 1.199018593 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47699) (512y: 0) (512z: 0) + 3,104,466,483 cycles # 2.747 GHz + 6,715,454,919 instructions # 2.16 insn per cycle + 1.130606631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47692) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731623419345E-004 Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.633831e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642301e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642301e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.731919e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.740037e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.740037e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.009236 sec +TOTAL : 0.951895 sec INFO: No Floating Point Exceptions have been reported - 2,628,290,758 cycles # 2.601 GHz - 5,969,462,739 instructions # 2.27 insn per cycle - 1.017917591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41882) (512y: 13) (512z: 0) + 2,625,337,295 cycles # 2.748 GHz + 5,966,178,470 instructions # 2.27 insn per cycle + 0.956115789 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41858) (512y: 13) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627273e-04 Avg ME (F77/C++) = 6.6272731623419345E-004 Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.325974e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330533e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330533e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.400560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405624e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405624e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.242092 sec +TOTAL : 1.176100 sec INFO: No Floating Point Exceptions have been reported - 2,077,381,824 cycles # 1.674 GHz - 3,490,865,426 instructions # 1.68 insn per cycle - 1.248861709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44487) + 2,074,048,907 cycles # 1.758 GHz + 3,487,720,369 instructions # 1.68 insn per cycle + 1.180409639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44494) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627275e-04 Avg ME (F77/C++) = 6.6272750247886592E-004 Relative difference = 3.740400032174438e-09 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index f1d40dff2c..9378c125b2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:17:52 +DATE: 2024-09-18_12:18:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.307071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.337162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.339154e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.318122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344688e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346795e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532508 sec +TOTAL : 0.537854 sec INFO: No Floating Point Exceptions have been reported - 2,203,526,312 cycles # 2.869 GHz - 3,467,986,959 instructions # 1.57 insn per cycle - 0.824379177 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 + 2,221,101,860 cycles # 2.870 GHz + 3,456,789,338 instructions # 1.56 insn per cycle + 0.830636964 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133497e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.164330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.165560e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.135476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.165199e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166400e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.045403 sec +TOTAL : 3.042010 sec INFO: No Floating Point Exceptions have been reported - 9,651,769,916 cycles # 2.918 GHz - 21,560,396,285 instructions # 2.23 insn per cycle - 3.363407224 seconds time elapsed + 9,635,962,932 cycles # 2.918 GHz + 21,731,646,939 instructions # 2.26 insn per cycle + 3.358385171 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.856718e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.857596e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.857596e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.865433e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.866327e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.866327e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.839571 sec +TOTAL : 8.798260 sec INFO: No Floating Point Exceptions have been reported - 25,916,613,183 cycles # 2.931 GHz - 79,423,792,934 instructions # 3.06 insn per cycle - 8.843857471 seconds time elapsed + 25,923,427,719 cycles # 2.945 GHz + 79,426,669,152 instructions # 3.06 insn per cycle + 8.802604907 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.495399e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.498545e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.498545e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.509753e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512944e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512944e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.699126 sec +TOTAL : 4.679543 sec INFO: No Floating Point Exceptions have been reported - 12,847,395,150 cycles # 2.733 GHz - 38,826,102,030 instructions # 3.02 insn per cycle - 4.703180057 seconds time elapsed + 12,835,987,651 cycles # 2.741 GHz + 38,823,362,502 instructions # 3.02 insn per cycle + 4.683930656 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.037182e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.053225e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.053225e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.042437e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.059866e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.059866e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.047520 sec +TOTAL : 2.045674 sec INFO: No Floating Point Exceptions have been reported - 5,598,661,180 cycles # 2.730 GHz - 13,618,631,873 instructions # 2.43 insn per cycle - 2.051512013 seconds time elapsed + 5,599,505,022 cycles # 2.733 GHz + 13,616,194,882 instructions # 2.43 insn per cycle + 2.050016410 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.221665e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.243251e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.243251e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.300992e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.323362e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.323362e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.785642 sec +TOTAL : 1.769579 sec INFO: No Floating Point Exceptions have been reported - 4,865,374,839 cycles # 2.720 GHz - 12,297,660,832 instructions # 2.53 insn per cycle - 1.789585857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 79) (512z: 0) + 4,864,538,423 cycles # 2.743 GHz + 12,294,521,282 instructions # 2.53 insn per cycle + 1.774039102 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 80) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.872514e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.884400e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.884400e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.972443e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.984642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.984642e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.393483 sec +TOTAL : 2.358911 sec INFO: No Floating Point Exceptions have been reported - 4,171,721,525 cycles # 1.741 GHz - 6,391,185,056 instructions # 1.53 insn per cycle - 2.397568985 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1980) (512y: 93) (512z: 9360) + 4,168,866,472 cycles # 1.765 GHz + 6,393,098,618 instructions # 1.53 insn per cycle + 2.363390601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1983) (512y: 92) (512z: 9360) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index ae3635632d..032ee51884 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-15_11:18:27 +DATE: 2024-09-18_12:18:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.320961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.350573e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352533e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.323949e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.349755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352036e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.532217 sec +TOTAL : 0.534628 sec INFO: No Floating Point Exceptions have been reported - 2,207,479,579 cycles # 2.873 GHz - 3,464,148,832 instructions # 1.57 insn per cycle - 0.824297603 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 + 2,204,767,059 cycles # 2.871 GHz + 3,455,052,131 instructions # 1.57 insn per cycle + 0.826431777 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.148990e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.180422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.181658e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.145238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175049e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176235e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.035739 sec +TOTAL : 3.038517 sec INFO: No Floating Point Exceptions have been reported - 9,612,803,881 cycles # 2.915 GHz - 20,074,302,744 instructions # 2.09 insn per cycle - 3.353532451 seconds time elapsed + 9,654,182,964 cycles # 2.928 GHz + 20,172,707,879 instructions # 2.09 insn per cycle + 3.353606693 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.831511e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.832364e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.832364e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.861444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.862342e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.862342e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.961804 sec +TOTAL : 8.816790 sec INFO: No Floating Point Exceptions have been reported - 26,010,493,082 cycles # 2.902 GHz - 79,449,384,960 instructions # 3.05 insn per cycle - 8.965752302 seconds time elapsed + 25,987,801,849 cycles # 2.947 GHz + 79,452,087,213 instructions # 3.06 insn per cycle + 8.821027518 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.477024e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.480127e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.480127e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.513306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.516455e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516455e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.723753 sec +TOTAL : 4.674356 sec INFO: No Floating Point Exceptions have been reported - 12,826,084,303 cycles # 2.714 GHz - 38,778,289,694 instructions # 3.02 insn per cycle - 4.727826379 seconds time elapsed + 12,813,296,665 cycles # 2.739 GHz + 38,778,823,155 instructions # 3.03 insn per cycle + 4.678665662 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.051767e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.067821e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.067821e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.042911e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.058963e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.058963e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.043694 sec +TOTAL : 2.045213 sec INFO: No Floating Point Exceptions have been reported - 5,591,778,218 cycles # 2.733 GHz - 13,733,552,430 instructions # 2.46 insn per cycle - 2.047665232 seconds time elapsed + 5,589,546,199 cycles # 2.728 GHz + 13,732,854,665 instructions # 2.46 insn per cycle + 2.049788655 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.123898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.144357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.144357e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.106583e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.127720e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.127720e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.804403 sec +TOTAL : 1.807350 sec INFO: No Floating Point Exceptions have been reported - 4,951,573,094 cycles # 2.739 GHz - 12,422,632,916 instructions # 2.51 insn per cycle - 1.808331695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 239) (512z: 0) + 4,955,573,408 cycles # 2.736 GHz + 12,423,027,135 instructions # 2.51 insn per cycle + 1.811880023 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.884699e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.896731e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.896731e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.875797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888202e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888202e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.389141 sec +TOTAL : 2.391557 sec INFO: No Floating Point Exceptions have been reported - 4,181,828,175 cycles # 1.750 GHz - 6,496,177,989 instructions # 1.55 insn per cycle - 2.393377398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1803) (512y: 191) (512z: 9369) + 4,183,217,410 cycles # 1.747 GHz + 6,495,987,121 instructions # 1.55 insn per cycle + 2.396041838 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1806) (512y: 190) (512z: 9358) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730409276857E-004 Relative difference = 2.956342832710188e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 407fbbe6c0..7ab313debd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:20:21 +DATE: 2024-09-18_12:20:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.059284e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059685e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059813e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.053996e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054389e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054544e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.451621 sec +TOTAL : 2.439529 sec INFO: No Floating Point Exceptions have been reported - 8,080,887,114 cycles # 2.907 GHz - 16,734,437,330 instructions # 2.07 insn per cycle - 2.836211679 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 + 8,096,284,346 cycles # 2.927 GHz + 17,063,420,790 instructions # 2.11 insn per cycle + 2.826206150 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.254596e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.256737e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.256950e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.238045e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.240055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.240313e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.020216 sec +TOTAL : 4.011196 sec INFO: No Floating Point Exceptions have been reported - 12,719,492,672 cycles # 2.923 GHz - 29,448,097,640 instructions # 2.32 insn per cycle - 4.407436029 seconds time elapsed + 12,704,613,289 cycles # 2.925 GHz + 30,115,204,727 instructions # 2.37 insn per cycle + 4.397191434 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.610103e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.610301e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.610301e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.572616e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.572823e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.572823e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.935255 sec +TOTAL : 6.975845 sec INFO: No Floating Point Exceptions have been reported - 18,974,774,871 cycles # 2.735 GHz - 53,899,721,094 instructions # 2.84 insn per cycle - 6.939338261 seconds time elapsed + 19,035,417,803 cycles # 2.728 GHz + 53,904,235,908 instructions # 2.83 insn per cycle + 6.980238056 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.579226e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.579318e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.579318e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.590030e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.590126e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.590126e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.345304 sec +TOTAL : 3.323749 sec INFO: No Floating Point Exceptions have been reported - 9,800,813,517 cycles # 2.927 GHz - 27,149,189,789 instructions # 2.77 insn per cycle - 3.349514409 seconds time elapsed + 9,780,563,101 cycles # 2.940 GHz + 27,151,089,688 instructions # 2.78 insn per cycle + 3.328023666 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.366336e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366803e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366803e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.385331e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385742e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385742e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.570292 sec +TOTAL : 1.561733 sec INFO: No Floating Point Exceptions have been reported - 4,287,053,926 cycles # 2.724 GHz - 9,590,127,631 instructions # 2.24 insn per cycle - 1.574599019 seconds time elapsed + 4,266,182,969 cycles # 2.725 GHz + 9,590,975,871 instructions # 2.25 insn per cycle + 1.566018474 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.904765e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.905290e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.905290e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.892057e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892635e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.353979 sec +TOTAL : 1.358913 sec INFO: No Floating Point Exceptions have been reported - 3,709,436,689 cycles # 2.733 GHz - 8,514,247,183 instructions # 2.30 insn per cycle - 1.357880276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) + 3,729,263,843 cycles # 2.737 GHz + 8,515,569,817 instructions # 2.28 insn per cycle + 1.363199183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.407683e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408196e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408196e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.395803e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.396338e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396338e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.552623 sec +TOTAL : 1.556955 sec INFO: No Floating Point Exceptions have been reported - 2,699,560,921 cycles # 1.736 GHz - 4,280,862,154 instructions # 1.59 insn per cycle - 1.556608026 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) + 2,698,860,839 cycles # 1.729 GHz + 4,282,343,065 instructions # 1.59 insn per cycle + 1.561500058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index e032151033..5983376983 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,11 +21,11 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:49:15 +DATE: 2024-09-18_12:56:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -34,17 +34,17 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.054597e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.057500e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.057500e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.052616e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054381e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054381e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.436820 sec +TOTAL : 2.389054 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 8,044,156,582 cycles # 2.913 GHz - 18,167,469,518 instructions # 2.26 insn per cycle - 2.819990438 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge + 7,904,157,038 cycles # 2.912 GHz + 16,771,352,323 instructions # 2.12 insn per cycle + 2.770325050 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -52,7 +52,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -61,18 +61,18 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.188185e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.221546e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.221546e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.237194e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.272545e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.272545e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.024442 sec +TOTAL : 3.987988 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 12,704,137,155 cycles # 2.918 GHz - 27,467,799,669 instructions # 2.16 insn per cycle - 4.411963692 seconds time elapsed + 12,604,681,487 cycles # 2.919 GHz + 28,965,849,382 instructions # 2.30 insn per cycle + 4.373962640 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,35 +80,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.352982e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.353176e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.353176e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.613542e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.613748e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.613748e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.182913 sec +TOTAL : 6.936083 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 19,533,134,643 cycles # 2.719 GHz - 53,904,822,620 instructions # 2.76 insn per cycle - 7.186820393 seconds time elapsed + 18,900,515,421 cycles # 2.724 GHz + 53,905,451,035 instructions # 2.85 insn per cycle + 6.940621858 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.583220e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.583307e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.583307e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538785e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538876e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538876e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.337581 sec +TOTAL : 3.433615 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 9,779,129,351 cycles # 2.927 GHz - 27,151,664,900 instructions # 2.78 insn per cycle - 3.341583664 seconds time elapsed + 10,052,781,401 cycles # 2.925 GHz + 27,153,872,228 instructions # 2.70 insn per cycle + 3.438126502 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.365450e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.365854e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.365854e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.384986e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385397e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385397e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.570834 sec +TOTAL : 1.561732 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,275,074,655 cycles # 2.716 GHz - 9,592,294,661 instructions # 2.24 insn per cycle - 1.574792391 seconds time elapsed + 4,257,385,748 cycles # 2.719 GHz + 9,593,157,745 instructions # 2.25 insn per cycle + 1.566325188 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.876513e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.877048e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.877048e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.887075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.887680e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.887680e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.364090 sec +TOTAL : 1.360664 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 3,720,013,902 cycles # 2.721 GHz - 8,517,094,572 instructions # 2.29 insn per cycle - 1.368386654 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 89) (512z: 0) + 3,718,394,007 cycles # 2.725 GHz + 8,517,746,108 instructions # 2.29 insn per cycle + 1.365273931 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -218,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.420108e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.420617e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420617e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.422958e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.423581e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423581e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.546895 sec +TOTAL : 1.545411 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,698,104,238 cycles # 1.741 GHz - 4,283,566,876 instructions # 1.59 insn per cycle - 1.551097954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2853) (512y: 103) (512z:79114) + 2,703,115,511 cycles # 1.745 GHz + 4,284,711,505 instructions # 1.59 insn per cycle + 1.550234745 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -252,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 71b1803a4d..6972883511 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:21:49 +DATE: 2024-09-18_12:22:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.055075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055529e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055650e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.054893e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055305e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055482e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.454190 sec +TOTAL : 2.442249 sec INFO: No Floating Point Exceptions have been reported - 8,101,270,896 cycles # 2.912 GHz - 18,320,414,768 instructions # 2.26 insn per cycle - 2.837550341 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 + 8,106,561,725 cycles # 2.931 GHz + 17,204,264,784 instructions # 2.12 insn per cycle + 2.825101828 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.224205e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.226444e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.226692e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.195814e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.197984e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.198227e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.029157 sec +TOTAL : 4.015071 sec INFO: No Floating Point Exceptions have been reported - 12,695,828,795 cycles # 2.910 GHz - 28,709,503,011 instructions # 2.26 insn per cycle - 4.420420636 seconds time elapsed + 12,724,131,626 cycles # 2.928 GHz + 29,969,146,046 instructions # 2.36 insn per cycle + 4.400441096 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722595284406640E-003 Relative difference = 3.5164777671934515e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.201824e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.202080e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.202080e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.111535e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.111769e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.111769e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.438167 sec +TOTAL : 6.512090 sec INFO: No Floating Point Exceptions have been reported - 18,843,132,149 cycles # 2.926 GHz - 53,928,570,497 instructions # 2.86 insn per cycle - 6.442267111 seconds time elapsed + 18,865,192,455 cycles # 2.896 GHz + 53,932,477,912 instructions # 2.86 insn per cycle + 6.516216407 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.562611e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.562704e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562704e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.566187e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.566277e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.566277e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.380180 sec +TOTAL : 3.374379 sec INFO: No Floating Point Exceptions have been reported - 9,918,861,148 cycles # 2.932 GHz - 27,128,280,341 instructions # 2.74 insn per cycle - 3.383996000 seconds time elapsed + 9,914,343,626 cycles # 2.935 GHz + 27,131,823,716 instructions # 2.74 insn per cycle + 3.378885579 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.368711e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.369114e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.369114e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.354421e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.354826e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.354826e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.568940 sec +TOTAL : 1.575572 sec INFO: No Floating Point Exceptions have been reported - 4,289,720,535 cycles # 2.728 GHz - 9,584,928,513 instructions # 2.23 insn per cycle - 1.573132113 seconds time elapsed + 4,301,534,798 cycles # 2.724 GHz + 9,586,207,937 instructions # 2.23 insn per cycle + 1.579825913 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.874256e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.874798e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.874798e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.882229e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882764e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882764e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.364932 sec +TOTAL : 1.361976 sec INFO: No Floating Point Exceptions have been reported - 3,728,944,037 cycles # 2.726 GHz - 8,507,330,131 instructions # 2.28 insn per cycle - 1.368786926 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 239) (512z: 0) + 3,732,974,645 cycles # 2.734 GHz + 8,507,919,232 instructions # 2.28 insn per cycle + 1.366219448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 240) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.414224e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.414743e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.414743e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.421560e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.422069e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422069e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.549013 sec +TOTAL : 1.546646 sec INFO: No Floating Point Exceptions have been reported - 2,698,122,905 cycles # 1.738 GHz - 4,280,648,246 instructions # 1.59 insn per cycle - 1.553090413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 185) (512z:79098) + 2,700,867,753 cycles # 1.742 GHz + 4,281,876,861 instructions # 1.59 insn per cycle + 1.551074701 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2693) (512y: 184) (512z:79098) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285411531E-003 Relative difference = 3.516375977906115e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 26694465db..41f4336bf3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:23:17 +DATE: 2024-09-18_12:23:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.208704e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.209632e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.209859e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.204897e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.205686e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.206019e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.761935 sec +TOTAL : 1.744512 sec INFO: No Floating Point Exceptions have been reported - 5,908,004,381 cycles # 2.901 GHz - 11,686,361,328 instructions # 1.98 insn per cycle - 2.093948305 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 + 5,890,882,031 cycles # 2.919 GHz + 11,806,932,962 instructions # 2.00 insn per cycle + 2.074529782 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.102338e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.102897e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.103014e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.136881e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.137530e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.137618e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.075560 sec +TOTAL : 2.049597 sec INFO: No Floating Point Exceptions have been reported - 6,795,219,354 cycles # 2.902 GHz - 14,967,758,240 instructions # 2.20 insn per cycle - 2.398428041 seconds time elapsed + 6,759,095,385 cycles # 2.923 GHz + 14,845,205,038 instructions # 2.20 insn per cycle + 2.369358973 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849635e-03 Avg ME (F77/GPU) = 9.8712451931260159E-003 Relative difference = 0.0021940095370046923 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.563117e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.563375e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.563375e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.543544e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.543805e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.543805e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.168224 sec +TOTAL : 6.183077 sec INFO: No Floating Point Exceptions have been reported - 18,106,019,929 cycles # 2.934 GHz - 53,907,716,361 instructions # 2.98 insn per cycle - 6.172403776 seconds time elapsed + 18,161,151,116 cycles # 2.936 GHz + 53,910,939,698 instructions # 2.97 insn per cycle + 6.187519652 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087551509E-003 Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.366569e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366962e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.366962e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.361492e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.361888e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.361888e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.570259 sec +TOTAL : 1.573076 sec INFO: No Floating Point Exceptions have been reported - 4,597,646,888 cycles # 2.923 GHz - 13,807,163,752 instructions # 3.00 insn per cycle - 1.574045592 seconds time elapsed + 4,616,676,545 cycles # 2.928 GHz + 13,807,548,367 instructions # 2.99 insn per cycle + 1.577368513 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896367235E-003 Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.801272e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.802916e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.802916e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.784398e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.786227e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.786227e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.778379 sec +TOTAL : 0.780447 sec INFO: No Floating Point Exceptions have been reported - 2,130,043,758 cycles # 2.726 GHz - 4,836,599,174 instructions # 2.27 insn per cycle - 0.782206721 seconds time elapsed + 2,130,555,516 cycles # 2.717 GHz + 4,837,275,089 instructions # 2.27 insn per cycle + 0.784743576 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.682520e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.684604e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.684604e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.634242e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.636553e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.636553e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688940 sec +TOTAL : 0.693797 sec INFO: No Floating Point Exceptions have been reported - 1,884,507,725 cycles # 2.723 GHz - 4,290,819,235 instructions # 2.28 insn per cycle - 0.692749981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) + 1,903,490,036 cycles # 2.729 GHz + 4,291,225,209 instructions # 2.25 insn per cycle + 0.698112096 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.875530e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.877565e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.877565e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.885404e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.887629e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.887629e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.770700 sec +TOTAL : 0.769903 sec INFO: No Floating Point Exceptions have been reported - 1,352,613,897 cycles # 1.747 GHz - 2,162,405,721 instructions # 1.60 insn per cycle - 0.774947088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) + 1,354,371,935 cycles # 1.750 GHz + 2,162,822,545 instructions # 1.60 insn per cycle + 0.774469858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982676284E-003 Relative difference = 2.004124217057488e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 8e4037314e..8d8b09449b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,11 +21,11 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:50:44 +DATE: 2024-09-18_12:57:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -34,17 +34,17 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.261572e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.268191e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.268191e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.296128e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300632e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300632e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 -TOTAL : 1.738260 sec +TOTAL : 1.691925 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 5,820,188,860 cycles # 2.913 GHz - 12,502,480,728 instructions # 2.15 insn per cycle - 2.056507800 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge + 5,650,162,983 cycles # 2.892 GHz + 11,596,549,862 instructions # 2.05 insn per cycle + 2.010258263 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) @@ -52,7 +52,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -61,18 +61,18 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.148842e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.160493e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.160493e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.106225e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117844e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117844e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 -TOTAL : 2.045649 sec +TOTAL : 2.039868 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 6,753,066,902 cycles # 2.917 GHz - 14,813,097,918 instructions # 2.19 insn per cycle - 2.374262766 seconds time elapsed + 6,704,150,880 cycles # 2.913 GHz + 14,933,981,007 instructions # 2.23 insn per cycle + 2.357689511 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -80,35 +80,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849635e-03 Avg ME (F77/GPU) = 9.8712451931260159E-003 Relative difference = 0.0021940095370046923 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.502637e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.502889e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.502889e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.476123e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.476381e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.476381e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.211468 sec +TOTAL : 6.231080 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 18,143,644,137 cycles # 2.921 GHz - 53,909,939,321 instructions # 2.97 insn per cycle - 6.215559174 seconds time elapsed + 18,168,605,946 cycles # 2.914 GHz + 53,913,151,543 instructions # 2.97 insn per cycle + 6.235604617 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087551509E-003 Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.339632e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.340031e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340031e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.367327e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.367745e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.367745e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.583158 sec +TOTAL : 1.570359 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,644,642,721 cycles # 2.928 GHz - 13,808,855,992 instructions # 2.97 insn per cycle - 1.587116749 seconds time elapsed + 4,609,204,013 cycles # 2.928 GHz + 13,810,618,137 instructions # 3.00 insn per cycle + 1.574904752 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896367235E-003 Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.786207e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.787843e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.787843e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.813057e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.814753e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.814753e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.780530 sec +TOTAL : 0.777334 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,130,238,055 cycles # 2.718 GHz - 4,838,587,482 instructions # 2.27 insn per cycle - 0.784611119 seconds time elapsed + 2,130,492,369 cycles # 2.727 GHz + 4,838,939,909 instructions # 2.27 insn per cycle + 0.781848874 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.698223e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.700507e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.700507e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.672152e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.674283e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.674283e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688171 sec +TOTAL : 0.690681 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,885,276,472 cycles # 2.726 GHz - 4,293,094,440 instructions # 2.28 insn per cycle - 0.692122848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81185) (512y: 44) (512z: 0) + 1,888,040,180 cycles # 2.718 GHz + 4,293,435,273 instructions # 2.27 insn per cycle + 0.695178892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -218,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091246E-003 Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.810636e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.812720e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.812720e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.827455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.829435e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.829435e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.777981 sec +TOTAL : 0.776050 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,355,130,660 cycles # 1.735 GHz - 2,164,600,762 instructions # 1.60 insn per cycle - 0.782043846 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3479) (512y: 47) (512z:79330) + 1,356,992,115 cycles # 1.740 GHz + 2,165,171,343 instructions # 1.60 insn per cycle + 0.780688696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -252,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982676284E-003 Relative difference = 2.004124217057488e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 113bcaacf7..43e4fd4779 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:24:20 +DATE: 2024-09-18_12:24:51 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.202287e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.203031e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.203251e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.195253e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.195989e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.196280e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.759878 sec +TOTAL : 1.748628 sec INFO: No Floating Point Exceptions have been reported - 5,921,315,429 cycles # 2.907 GHz - 12,451,469,321 instructions # 2.10 insn per cycle - 2.095417433 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 + 5,866,556,695 cycles # 2.917 GHz + 12,565,857,650 instructions # 2.14 insn per cycle + 2.067294353 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.113173e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.113784e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.113870e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.121566e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.122211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.122327e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.088774 sec +TOTAL : 2.053021 sec INFO: No Floating Point Exceptions have been reported - 6,829,574,271 cycles # 2.905 GHz - 14,898,722,914 instructions # 2.18 insn per cycle - 2.410171422 seconds time elapsed + 6,778,897,896 cycles # 2.924 GHz + 14,985,250,436 instructions # 2.21 insn per cycle + 2.374125707 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.849635e-03 Avg ME (F77/GPU) = 9.8712451931260107E-003 Relative difference = 0.0021940095370041636 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.526309e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.526569e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.526569e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.587070e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.587332e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.587332e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.192756 sec +TOTAL : 6.151381 sec INFO: No Floating Point Exceptions have been reported - 18,135,421,902 cycles # 2.927 GHz - 53,892,650,631 instructions # 2.97 insn per cycle - 6.196840431 seconds time elapsed + 18,055,403,744 cycles # 2.934 GHz + 53,896,033,902 instructions # 2.99 insn per cycle + 6.155606485 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847961e-03 Avg ME (F77/C++) = 9.8479612087572898E-003 Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.396709e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.397124e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.397124e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.398632e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399059e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.556067 sec +TOTAL : 1.555535 sec INFO: No Floating Point Exceptions have been reported - 4,573,398,855 cycles # 2.934 GHz - 13,800,378,388 instructions # 3.02 insn per cycle - 1.559827589 seconds time elapsed + 4,569,755,461 cycles # 2.931 GHz + 13,800,747,699 instructions # 3.02 insn per cycle + 1.559859354 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.847955e-03 Avg ME (F77/C++) = 9.8479546896065809E-003 Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.651495e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.653049e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.653049e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.803652e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.805665e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.805665e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.795401 sec +TOTAL : 0.778546 sec INFO: No Floating Point Exceptions have been reported - 2,148,860,867 cycles # 2.691 GHz - 4,840,602,339 instructions # 2.25 insn per cycle - 0.799229981 seconds time elapsed + 2,147,523,686 cycles # 2.745 GHz + 4,840,927,245 instructions # 2.25 insn per cycle + 0.782889882 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091923E-003 Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.688407e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.690576e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.690576e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.693768e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.696106e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.696106e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688541 sec +TOTAL : 0.688038 sec INFO: No Floating Point Exceptions have been reported - 1,890,706,185 cycles # 2.733 GHz - 4,294,394,779 instructions # 2.27 insn per cycle - 0.692328039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 24) (512z: 0) + 1,894,736,849 cycles # 2.739 GHz + 4,295,025,191 instructions # 2.27 insn per cycle + 0.692237484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 25) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892973e-03 Avg ME (F77/C++) = 9.8929728161091923E-003 Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.826093e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.828148e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.828148e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.859865e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.862153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.862153e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.775828 sec +TOTAL : 0.772052 sec INFO: No Floating Point Exceptions have been reported - 1,357,390,482 cycles # 1.742 GHz - 2,169,212,126 instructions # 1.60 insn per cycle - 0.779795742 seconds time elapsed + 1,359,092,301 cycles # 1.753 GHz + 2,169,957,409 instructions # 1.60 insn per cycle + 0.776490041 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.892981e-03 Avg ME (F77/C++) = 9.8929811982957326E-003 Relative difference = 2.0044082998332894e-08 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 2e59aa2257..e02407d644 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:25:23 +DATE: 2024-09-18_12:25:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.663841e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.664390e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.664590e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.664550e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665186e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665405e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.203630 sec +TOTAL : 2.193141 sec INFO: No Floating Point Exceptions have been reported - 7,260,397,959 cycles # 2.866 GHz - 15,031,707,879 instructions # 2.07 insn per cycle - 2.589013700 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 + 7,365,717,542 cycles # 2.923 GHz + 16,291,118,073 instructions # 2.21 insn per cycle + 2.576836591 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.107763e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108067e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108098e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.102923e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103265e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.442022 sec +TOTAL : 3.419785 sec INFO: No Floating Point Exceptions have been reported - 10,932,120,354 cycles # 2.895 GHz - 24,906,946,249 instructions # 2.28 insn per cycle - 3.831975982 seconds time elapsed + 10,963,927,138 cycles # 2.923 GHz + 24,861,261,596 instructions # 2.27 insn per cycle + 3.806537159 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722599015656498E-003 Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.516129e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.516327e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.516327e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.500673e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.500867e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.500867e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.025685 sec +TOTAL : 7.041188 sec INFO: No Floating Point Exceptions have been reported - 19,256,305,943 cycles # 2.740 GHz - 54,130,622,749 instructions # 2.81 insn per cycle - 7.029878997 seconds time elapsed + 19,221,485,171 cycles # 2.729 GHz + 54,134,690,618 instructions # 2.82 insn per cycle + 7.045507456 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.524890e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.524973e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.524973e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.537074e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.537163e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.537163e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.464849 sec +TOTAL : 3.437220 sec INFO: No Floating Point Exceptions have been reported - 9,453,784,509 cycles # 2.726 GHz - 26,186,103,091 instructions # 2.77 insn per cycle - 3.468732831 seconds time elapsed + 9,396,080,919 cycles # 2.731 GHz + 26,188,082,836 instructions # 2.79 insn per cycle + 3.441517756 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.508306e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.508754e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508754e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.541134e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.541635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.541635e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.507182 sec +TOTAL : 1.493238 sec INFO: No Floating Point Exceptions have been reported - 4,099,795,192 cycles # 2.715 GHz - 9,249,955,249 instructions # 2.26 insn per cycle - 1.510975685 seconds time elapsed + 4,077,957,635 cycles # 2.724 GHz + 9,249,641,886 instructions # 2.27 insn per cycle + 1.497708781 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.116819e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.117442e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.117442e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.136665e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.137271e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.137271e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.284411 sec +TOTAL : 1.279311 sec INFO: No Floating Point Exceptions have been reported - 3,509,716,252 cycles # 2.725 GHz - 8,182,475,258 instructions # 2.33 insn per cycle - 1.288638878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 79) (512z: 0) + 3,517,339,720 cycles # 2.742 GHz + 8,183,228,052 instructions # 2.33 insn per cycle + 1.283633317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 80) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.462021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.462537e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.462537e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.501058e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.501647e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.501647e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.527279 sec +TOTAL : 1.511075 sec INFO: No Floating Point Exceptions have been reported - 2,661,319,941 cycles # 1.739 GHz - 4,172,569,565 instructions # 1.57 insn per cycle - 1.531717386 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2612) (512y: 93) (512z:78910) + 2,666,286,599 cycles # 1.760 GHz + 4,173,044,119 instructions # 1.57 insn per cycle + 1.515586960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 92) (512z:78910) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index f2e4a2151c..59afbf5683 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -21,44 +21,44 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-15_11:26:49 +DATE: 2024-09-18_12:27:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.668216e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.668742e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.668891e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.673618e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.674137e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.674360e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.204926 sec +TOTAL : 2.187725 sec INFO: No Floating Point Exceptions have been reported - 7,354,907,186 cycles # 2.903 GHz - 15,835,326,846 instructions # 2.15 insn per cycle - 2.589353613 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 + 7,320,649,548 cycles # 2.912 GHz + 16,262,382,237 instructions # 2.22 insn per cycle + 2.571114049 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111109e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111413e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111447e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.105826e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.106139e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106173e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.435805 sec +TOTAL : 3.426824 sec INFO: No Floating Point Exceptions have been reported - 11,002,728,447 cycles # 2.923 GHz - 25,822,053,923 instructions # 2.35 insn per cycle - 3.822280777 seconds time elapsed + 11,001,686,020 cycles # 2.930 GHz + 25,147,468,300 instructions # 2.29 insn per cycle + 3.812692076 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 9.872263e-03 Avg ME (F77/GPU) = 9.8722599015656498E-003 Relative difference = 3.1385249252060663e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.824002e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.824211e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.824211e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.043178e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.043403e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.043403e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.748238 sec +TOTAL : 6.566514 sec INFO: No Floating Point Exceptions have been reported - 19,286,477,225 cycles # 2.857 GHz - 54,157,907,603 instructions # 2.81 insn per cycle - 6.752432065 seconds time elapsed + 19,176,347,779 cycles # 2.919 GHz + 54,156,968,111 instructions # 2.82 insn per cycle + 6.570813145 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.548001e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.548086e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.548086e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.555217e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.555303e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555303e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.412561 sec +TOTAL : 3.398105 sec INFO: No Floating Point Exceptions have been reported - 9,302,368,855 cycles # 2.723 GHz - 26,085,336,117 instructions # 2.80 insn per cycle - 3.416771061 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95938) (avx2: 0) (512y: 0) (512z: 0) + 9,273,027,189 cycles # 2.726 GHz + 26,087,136,722 instructions # 2.81 insn per cycle + 3.402445291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.533570e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.534051e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.534051e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.537227e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.537679e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.537679e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.496264 sec +TOTAL : 1.494127 sec INFO: No Floating Point Exceptions have been reported - 4,086,923,304 cycles # 2.726 GHz - 9,212,952,806 instructions # 2.25 insn per cycle - 1.500090267 seconds time elapsed + 4,071,118,335 cycles # 2.719 GHz + 9,214,803,224 instructions # 2.26 insn per cycle + 1.498443184 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.068352e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.068931e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.068931e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.138433e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.139090e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.139090e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.299365 sec +TOTAL : 1.277732 sec INFO: No Floating Point Exceptions have been reported - 3,513,960,907 cycles # 2.698 GHz - 8,167,668,326 instructions # 2.32 insn per cycle - 1.303235401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 229) (512z: 0) + 3,507,535,748 cycles # 2.738 GHz + 8,168,319,774 instructions # 2.33 insn per cycle + 1.282049677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 230) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.521239e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.521794e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.521794e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.543576e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.544114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.544114e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.500932 sec +TOTAL : 1.493048 sec INFO: No Floating Point Exceptions have been reported - 2,617,549,535 cycles # 1.740 GHz - 4,166,941,618 instructions # 1.59 insn per cycle - 1.504880250 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 175) (512z:78884) + 2,621,670,941 cycles # 1.752 GHz + 4,167,760,475 instructions # 1.59 insn per cycle + 1.497511330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1879) (512y: 174) (512z:78884) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 73af5e5b3a..b0413f07b6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:19:01 +DATE: 2024-09-18_12:19:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.740481e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.765338e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886154e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.879954e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.891707e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.001488e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459950 sec +TOTAL : 0.459044 sec INFO: No Floating Point Exceptions have been reported - 1,934,018,539 cycles # 2.861 GHz - 2,739,518,446 instructions # 1.42 insn per cycle - 0.734446139 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,939,663,698 cycles # 2.864 GHz + 2,747,739,655 instructions # 1.42 insn per cycle + 0.734387710 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.975676e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.474629e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.695966e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.061438e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.512391e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.741979e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.539678 sec +TOTAL : 0.541789 sec INFO: No Floating Point Exceptions have been reported - 2,257,806,163 cycles # 2.877 GHz - 3,239,125,642 instructions # 1.43 insn per cycle - 0.841027050 seconds time elapsed + 2,258,330,350 cycles # 2.885 GHz + 3,233,524,764 instructions # 1.43 insn per cycle + 0.842114758 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.056428e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078475e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.056900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.079897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079897e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.570596 sec +TOTAL : 1.570278 sec INFO: No Floating Point Exceptions have been reported - 4,620,202,435 cycles # 2.935 GHz - 13,190,173,768 instructions # 2.85 insn per cycle - 1.574765138 seconds time elapsed + 4,626,289,546 cycles # 2.939 GHz + 13,191,201,959 instructions # 2.85 insn per cycle + 1.574568894 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.870844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942105e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942105e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.877819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.949205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949205e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.895190 sec +TOTAL : 0.891875 sec INFO: No Floating Point Exceptions have been reported - 2,640,894,010 cycles # 2.940 GHz - 7,556,112,587 instructions # 2.86 insn per cycle - 0.899078617 seconds time elapsed + 2,638,327,743 cycles # 2.947 GHz + 7,555,209,951 instructions # 2.86 insn per cycle + 0.896114078 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.155420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.359383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.359383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.170773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.377039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.377039e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.539606 sec +TOTAL : 0.536262 sec INFO: No Floating Point Exceptions have been reported - 1,490,717,557 cycles # 2.746 GHz - 3,161,146,919 instructions # 2.12 insn per cycle - 0.543466540 seconds time elapsed + 1,489,383,659 cycles # 2.759 GHz + 3,159,296,473 instructions # 2.12 insn per cycle + 0.540558254 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.514709e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763624e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.529419e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784986e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.485581 sec +TOTAL : 0.483576 sec INFO: No Floating Point Exceptions have been reported - 1,345,992,067 cycles # 2.752 GHz - 3,013,895,719 instructions # 2.24 insn per cycle - 0.489750963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) + 1,345,705,641 cycles # 2.762 GHz + 3,013,816,668 instructions # 2.24 insn per cycle + 0.487835073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.329309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438411e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438411e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.357874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.470306e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.470306e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.725847 sec +TOTAL : 0.716743 sec INFO: No Floating Point Exceptions have been reported - 1,326,647,346 cycles # 1.820 GHz - 1,963,906,161 instructions # 1.48 insn per cycle - 0.729744934 seconds time elapsed + 1,329,087,485 cycles # 1.845 GHz + 1,962,911,490 instructions # 1.48 insn per cycle + 0.721045759 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 87049bf6bc..e338aa0c83 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:47:47 +DATE: 2024-09-18_12:54:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.302816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642797e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642797e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.300988e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.591479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591479e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.484911 sec +TOTAL : 0.483517 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,993,081,726 cycles # 2.857 GHz - 2,967,605,428 instructions # 1.49 insn per cycle - 0.755486323 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 2,018,637,546 cycles # 2.889 GHz + 3,002,221,313 instructions # 1.49 insn per cycle + 0.755433693 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.256871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.326938e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.326938e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.209513e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.250583e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.250583e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.759153 sec +TOTAL : 0.757854 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,900,800,476 cycles # 2.878 GHz - 4,476,324,954 instructions # 1.54 insn per cycle - 1.066391830 seconds time elapsed + 2,924,491,267 cycles # 2.893 GHz + 4,472,331,439 instructions # 1.53 insn per cycle + 1.067931667 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.051618e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.074674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.074674e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.054217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077389e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077389e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.585397 sec +TOTAL : 1.581825 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,659,791,475 cycles # 2.933 GHz - 13,199,729,048 instructions # 2.83 insn per cycle - 1.589552076 seconds time elapsed + 4,664,515,506 cycles # 2.942 GHz + 13,198,020,525 instructions # 2.83 insn per cycle + 1.586342613 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.863646e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935223e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935223e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.861182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934526e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.905908 sec +TOTAL : 0.908066 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,672,075,267 cycles # 2.939 GHz - 7,605,973,490 instructions # 2.85 insn per cycle - 0.909977972 seconds time elapsed + 2,683,422,373 cycles # 2.942 GHz + 7,604,693,273 instructions # 2.83 insn per cycle + 0.912668086 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.113398e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.317707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.317707e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.136463e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344918e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344918e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.554094 sec +TOTAL : 0.550693 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,524,354,103 cycles # 2.734 GHz - 3,211,905,393 instructions # 2.11 insn per cycle - 0.558166519 seconds time elapsed + 1,532,887,808 cycles # 2.763 GHz + 3,210,306,872 instructions # 2.09 insn per cycle + 0.555384102 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.488860e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737446e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.483226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741231e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.497012 sec +TOTAL : 0.498787 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,381,887,844 cycles # 2.761 GHz - 3,066,710,334 instructions # 2.22 insn per cycle - 0.501143809 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2752) (512y: 104) (512z: 0) + 1,390,412,454 cycles # 2.766 GHz + 3,064,189,434 instructions # 2.20 insn per cycle + 0.503409402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.170464e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268423e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268423e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.324425e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438930e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.785429 sec +TOTAL : 0.734309 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,369,203,746 cycles # 1.799 GHz - 2,005,266,999 instructions # 1.46 insn per cycle - 0.789533436 seconds time elapsed + 1,369,927,300 cycles # 1.856 GHz + 2,000,629,444 instructions # 1.46 insn per cycle + 0.738870915 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index f184fc3b5e..698af75849 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:19:15 +DATE: 2024-09-18_12:19:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.732857e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.764454e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.875095e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.870113e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.853917e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.966787e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459022 sec +TOTAL : 0.459366 sec INFO: No Floating Point Exceptions have been reported - 1,935,515,541 cycles # 2.868 GHz - 2,740,568,582 instructions # 1.42 insn per cycle - 0.732282850 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,939,416,729 cycles # 2.875 GHz + 2,719,660,225 instructions # 1.40 insn per cycle + 0.733418344 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.938986e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.388894e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.605968e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.000378e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.373625e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.579737e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.543627 sec +TOTAL : 0.544100 sec INFO: No Floating Point Exceptions have been reported - 2,240,911,931 cycles # 2.849 GHz - 3,134,508,527 instructions # 1.40 insn per cycle - 0.843804985 seconds time elapsed + 2,268,193,870 cycles # 2.866 GHz + 3,228,698,159 instructions # 1.42 insn per cycle + 0.849845463 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482467490466 Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.033389e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.055879e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.055879e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.061980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085030e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.606028 sec +TOTAL : 1.562513 sec INFO: No Floating Point Exceptions have been reported - 4,632,143,331 cycles # 2.878 GHz - 13,180,119,009 instructions # 2.85 insn per cycle - 1.610268805 seconds time elapsed + 4,622,072,256 cycles # 2.951 GHz + 13,179,636,938 instructions # 2.85 insn per cycle + 1.566824554 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.829571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.900649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.900649e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.876350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.948368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948368e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.915734 sec +TOTAL : 0.892202 sec INFO: No Floating Point Exceptions have been reported - 2,643,771,941 cycles # 2.877 GHz - 7,554,150,292 instructions # 2.86 insn per cycle - 0.919868185 seconds time elapsed + 2,639,628,239 cycles # 2.947 GHz + 7,552,826,806 instructions # 2.86 insn per cycle + 0.896585147 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.046256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.248866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.248866e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.183448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393646e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.558915 sec +TOTAL : 0.534141 sec INFO: No Floating Point Exceptions have been reported - 1,500,616,577 cycles # 2.669 GHz - 3,161,167,766 instructions # 2.11 insn per cycle - 0.563154837 seconds time elapsed + 1,491,163,611 cycles # 2.773 GHz + 3,158,625,928 instructions # 2.12 insn per cycle + 0.538404277 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.429175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.674618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.674618e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.492000e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.744364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744364e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.498371 sec +TOTAL : 0.489086 sec INFO: No Floating Point Exceptions have been reported - 1,352,614,614 cycles # 2.696 GHz - 3,013,058,203 instructions # 2.23 insn per cycle - 0.502370936 seconds time elapsed + 1,346,762,343 cycles # 2.733 GHz + 3,011,186,186 instructions # 2.24 insn per cycle + 0.493386881 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.263352e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.370712e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.370712e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.331076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.442354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.442354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.746831 sec +TOTAL : 0.724419 sec INFO: No Floating Point Exceptions have been reported - 1,330,812,654 cycles # 1.774 GHz - 1,962,138,478 instructions # 1.47 insn per cycle - 0.751010006 seconds time elapsed + 1,327,007,586 cycles # 1.823 GHz + 1,960,723,409 instructions # 1.48 insn per cycle + 0.728619129 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 9c9085f218..8a6bb74f5e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:19:29 +DATE: 2024-09-18_12:20:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.616183e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.859144e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.009356e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.830452e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.999598e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147092e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.455344 sec +TOTAL : 0.454704 sec INFO: No Floating Point Exceptions have been reported - 1,903,769,704 cycles # 2.832 GHz - 2,695,127,426 instructions # 1.42 insn per cycle - 0.728806465 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,916,653,758 cycles # 2.859 GHz + 2,706,744,679 instructions # 1.41 insn per cycle + 0.728210028 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.292895e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.269503e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.615665e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.474236e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.587297e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949656e+07 ) sec^-1 MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.493521 sec +TOTAL : 0.490292 sec INFO: No Floating Point Exceptions have been reported - 2,090,898,444 cycles # 2.835 GHz - 2,942,471,441 instructions # 1.41 insn per cycle - 0.794240657 seconds time elapsed + 2,078,449,093 cycles # 2.886 GHz + 2,974,210,572 instructions # 1.43 insn per cycle + 0.777065481 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247487904286338 Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.085317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.110333e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.110333e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.100218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.125308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.125308e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.528688 sec +TOTAL : 1.507385 sec INFO: No Floating Point Exceptions have been reported - 4,411,922,721 cycles # 2.879 GHz - 12,951,312,387 instructions # 2.94 insn per cycle - 1.532844163 seconds time elapsed + 4,410,101,975 cycles # 2.919 GHz + 12,953,085,822 instructions # 2.94 insn per cycle + 1.511568329 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.813599e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.988360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.988360e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.885848e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.067058e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.067058e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.600071 sec +TOTAL : 0.584913 sec INFO: No Floating Point Exceptions have been reported - 1,729,759,970 cycles # 2.867 GHz - 4,541,750,353 instructions # 2.63 insn per cycle - 0.604044137 seconds time elapsed + 1,727,797,245 cycles # 2.936 GHz + 4,541,987,860 instructions # 2.63 insn per cycle + 0.589023498 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.481903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.160699e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.160699e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.703055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.396540e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.396540e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.317183 sec +TOTAL : 0.305122 sec INFO: No Floating Point Exceptions have been reported - 858,921,512 cycles # 2.679 GHz - 1,917,766,555 instructions # 2.23 insn per cycle - 0.321171597 seconds time elapsed + 856,571,449 cycles # 2.776 GHz + 1,917,826,981 instructions # 2.24 insn per cycle + 0.309207440 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.857503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.629025e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.629025e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.972249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.763699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.763699e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.298118 sec +TOTAL : 0.292353 sec INFO: No Floating Point Exceptions have been reported - 804,518,989 cycles # 2.670 GHz - 1,834,610,739 instructions # 2.28 insn per cycle - 0.301964643 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) + 806,013,891 cycles # 2.724 GHz + 1,834,284,908 instructions # 2.28 insn per cycle + 0.296525539 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.365317e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.786659e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.786659e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.507099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952644e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.395762 sec +TOTAL : 0.383525 sec INFO: No Floating Point Exceptions have been reported - 728,663,796 cycles # 1.826 GHz - 1,308,267,192 instructions # 1.80 insn per cycle - 0.399787635 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) + 728,616,899 cycles # 1.883 GHz + 1,308,760,783 instructions # 1.80 insn per cycle + 0.387733440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index f23dffbec1..a6b985fae9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:48:01 +DATE: 2024-09-18_12:54:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.986387e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.435739e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.435739e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.927791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.333519e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.333519e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 -TOTAL : 0.462861 sec +TOTAL : 0.469351 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,938,742,838 cycles # 2.865 GHz - 2,865,087,008 instructions # 1.48 insn per cycle - 0.732940290 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge + 1,981,672,619 cycles # 2.860 GHz + 2,838,745,918 instructions # 1.43 insn per cycle + 0.751805905 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) @@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW @@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.036027e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.082450e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.082450e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.989037e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.963677e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.963677e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 -TOTAL : 0.632974 sec +TOTAL : 0.635748 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 2,495,936,905 cycles # 2.880 GHz - 3,785,157,902 instructions # 1.52 insn per cycle - 0.923834641 seconds time elapsed + 2,515,384,416 cycles # 2.885 GHz + 3,805,896,684 instructions # 1.51 insn per cycle + 0.928757267 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247487904286338 Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.104272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129547e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.129547e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.110811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.136583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136583e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.505088 sec +TOTAL : 1.496763 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 4,422,322,267 cycles # 2.932 GHz - 12,955,751,055 instructions # 2.93 insn per cycle - 1.509164533 seconds time elapsed + 4,424,063,947 cycles # 2.949 GHz + 12,956,460,167 instructions # 2.93 insn per cycle + 1.501035221 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.849156e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.028095e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.028095e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.857646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.036818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.036818e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.596878 sec +TOTAL : 0.595633 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 1,746,639,368 cycles # 2.911 GHz - 4,590,056,426 instructions # 2.63 insn per cycle - 0.600772729 seconds time elapsed + 1,753,185,847 cycles # 2.926 GHz + 4,590,460,046 instructions # 2.62 insn per cycle + 0.599868062 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.482456e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.156915e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.156915e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.498095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.167392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.167392e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.321160 sec +TOTAL : 0.320525 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 875,522,703 cycles # 2.698 GHz - 1,954,476,479 instructions # 2.23 insn per cycle - 0.325091323 seconds time elapsed + 879,877,577 cycles # 2.713 GHz + 1,955,191,246 instructions # 2.22 insn per cycle + 0.324936571 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.960780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.751467e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.751467e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.017893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.823832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.823832e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.296758 sec +TOTAL : 0.294540 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 821,090,022 cycles # 2.738 GHz - 1,871,468,752 instructions # 2.28 insn per cycle - 0.300472695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3402) (512y: 22) (512z: 0) + 824,659,177 cycles # 2.764 GHz + 1,871,065,231 instructions # 2.27 insn per cycle + 0.298923642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.492830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.932362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.932362e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.488254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.923976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.923976e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.388889 sec +TOTAL : 0.389655 sec INFO: No Floating Point Exceptions have been reported INFO: No Floating Point Exceptions have been reported - 746,594,357 cycles # 1.904 GHz - 1,349,630,324 instructions # 1.81 insn per cycle - 0.392744433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1963) (512y: 26) (512z: 2434) + 750,952,234 cycles # 1.909 GHz + 1,350,104,124 instructions # 1.80 insn per cycle + 0.394048329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index e2521e45b2..67763acaac 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:19:41 +DATE: 2024-09-18_12:20:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.639867e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.865475e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.015127e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.836197e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.010594e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149037e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.454979 sec +TOTAL : 0.455895 sec INFO: No Floating Point Exceptions have been reported - 1,901,448,560 cycles # 2.824 GHz - 2,678,183,164 instructions # 1.41 insn per cycle - 0.730072764 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,937,517,385 cycles # 2.882 GHz + 2,695,733,072 instructions # 1.39 insn per cycle + 0.731352438 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.246644e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.993104e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.325490e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.416288e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.368760e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.717095e+07 ) sec^-1 MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.496812 sec +TOTAL : 0.494521 sec INFO: No Floating Point Exceptions have been reported - 2,083,867,357 cycles # 2.832 GHz - 2,934,470,565 instructions # 1.41 insn per cycle - 0.792648211 seconds time elapsed + 2,101,577,521 cycles # 2.872 GHz + 2,967,805,317 instructions # 1.41 insn per cycle + 0.790688389 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424226e-01 Avg ME (F77/GPU) = 0.14247487904286338 Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.078136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103342e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103342e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.109320e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134422e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.538413 sec +TOTAL : 1.495018 sec INFO: No Floating Point Exceptions have been reported - 4,411,092,348 cycles # 2.861 GHz - 12,926,836,759 instructions # 2.93 insn per cycle - 1.542610115 seconds time elapsed + 4,406,318,830 cycles # 2.941 GHz + 12,927,562,871 instructions # 2.93 insn per cycle + 1.499121241 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246861273719524 Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.816554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.994652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.994652e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.896108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081157e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.599654 sec +TOTAL : 0.582933 sec INFO: No Floating Point Exceptions have been reported - 1,728,903,265 cycles # 2.870 GHz - 4,536,279,042 instructions # 2.62 insn per cycle - 0.603646034 seconds time elapsed + 1,729,684,566 cycles # 2.949 GHz + 4,536,959,704 instructions # 2.62 insn per cycle + 0.587227353 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 Avg ME (F77/C++) = 0.14246862329122401 Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.299874e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.938357e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.938357e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.671417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.388788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.388788e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.327791 sec +TOTAL : 0.306400 sec INFO: No Floating Point Exceptions have been reported - 861,849,665 cycles # 2.602 GHz - 1,914,633,101 instructions # 2.22 insn per cycle - 0.331876637 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3550) (512y: 0) (512z: 0) + 861,419,707 cycles # 2.779 GHz + 1,914,521,871 instructions # 2.22 insn per cycle + 0.310539350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3549) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.927651e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.715679e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.715679e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.063623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.871376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.871376e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.294357 sec +TOTAL : 0.288149 sec INFO: No Floating Point Exceptions have been reported - 802,533,600 cycles # 2.696 GHz - 1,830,391,280 instructions # 2.28 insn per cycle - 0.298329557 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3366) (512y: 22) (512z: 0) + 805,096,427 cycles # 2.760 GHz + 1,830,123,182 instructions # 2.27 insn per cycle + 0.292238886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 22) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491543012991 Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.433633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.866083e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.866083e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.516684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.964575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.964575e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.389558 sec +TOTAL : 0.382571 sec INFO: No Floating Point Exceptions have been reported - 729,078,705 cycles # 1.856 GHz - 1,305,984,013 instructions # 1.79 insn per cycle - 0.393475655 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1926) (512y: 26) (512z: 2437) + 732,988,918 cycles # 1.898 GHz + 1,306,469,020 instructions # 1.78 insn per cycle + 0.386957442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1928) (512y: 24) (512z: 2435) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247491576758442 Relative difference = 1.1066920862943416e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 75ffaff930..91e0f5565c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:19:53 +DATE: 2024-09-18_12:20:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.751782e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.854275e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.972003e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.873875e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.862522e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.986643e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.462457 sec +TOTAL : 0.460578 sec INFO: No Floating Point Exceptions have been reported - 1,917,992,386 cycles # 2.823 GHz - 2,716,857,811 instructions # 1.42 insn per cycle - 0.737791701 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 + 1,947,256,638 cycles # 2.867 GHz + 2,707,543,109 instructions # 1.39 insn per cycle + 0.736559227 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.933049e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.480174e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.706062e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.023480e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.410580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620297e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.542910 sec +TOTAL : 0.541253 sec INFO: No Floating Point Exceptions have been reported - 2,260,071,894 cycles # 2.877 GHz - 3,201,078,521 instructions # 1.42 insn per cycle - 0.842582370 seconds time elapsed + 2,242,175,675 cycles # 2.877 GHz + 3,202,804,008 instructions # 1.43 insn per cycle + 0.837177537 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.028418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.050644e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.050644e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.056357e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078870e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.613813 sec +TOTAL : 1.570709 sec INFO: No Floating Point Exceptions have been reported - 4,647,383,877 cycles # 2.879 GHz - 13,178,063,049 instructions # 2.84 insn per cycle - 1.618051260 seconds time elapsed + 4,639,217,468 cycles # 2.947 GHz + 13,177,906,216 instructions # 2.84 insn per cycle + 1.574828509 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.864504e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934483e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934483e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.872603e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943230e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943230e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.898369 sec +TOTAL : 0.894053 sec INFO: No Floating Point Exceptions have been reported - 2,648,200,185 cycles # 2.937 GHz - 7,475,755,342 instructions # 2.82 insn per cycle - 0.902206814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2: 0) (512y: 0) (512z: 0) + 2,648,821,910 cycles # 2.951 GHz + 7,473,297,472 instructions # 2.82 insn per cycle + 0.898331919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.200611e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.408501e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.408501e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.194377e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.403567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.403567e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.532171 sec +TOTAL : 0.532589 sec INFO: No Floating Point Exceptions have been reported - 1,476,374,652 cycles # 2.757 GHz - 3,128,702,616 instructions # 2.12 insn per cycle - 0.536024340 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3131) (512y: 0) (512z: 0) + 1,476,927,402 cycles # 2.754 GHz + 3,127,083,010 instructions # 2.12 insn per cycle + 0.536841632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3133) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.587903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.854303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.854303e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.590247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853965e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853965e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.476990 sec +TOTAL : 0.476058 sec INFO: No Floating Point Exceptions have been reported - 1,322,669,287 cycles # 2.754 GHz - 2,982,885,294 instructions # 2.26 insn per cycle - 0.480825528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2893) (512y: 110) (512z: 0) + 1,323,043,261 cycles # 2.758 GHz + 2,981,146,980 instructions # 2.25 insn per cycle + 0.480339840 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.251912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.353383e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.353383e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.287752e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.394431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.394431e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.750102 sec +TOTAL : 0.737861 sec INFO: No Floating Point Exceptions have been reported - 1,363,693,421 cycles # 1.811 GHz - 1,991,339,845 instructions # 1.46 insn per cycle - 0.753947194 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2252) + 1,365,080,339 cycles # 1.841 GHz + 1,989,993,648 instructions # 1.46 insn per cycle + 0.742169497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2251) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 40582e53fc..bc8dd367d2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-15_11:20:07 +DATE: 2024-09-18_12:20:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.750483e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.807638e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.927006e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.866883e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.866305e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.974692e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.460124 sec +TOTAL : 0.459058 sec INFO: No Floating Point Exceptions have been reported - 1,938,834,678 cycles # 2.862 GHz - 2,712,058,421 instructions # 1.40 insn per cycle - 0.735469517 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 + 1,947,107,746 cycles # 2.878 GHz + 2,728,462,242 instructions # 1.40 insn per cycle + 0.734017841 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.925117e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.366077e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.584033e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.016948e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.370809e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.575747e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.544645 sec +TOTAL : 0.539080 sec INFO: No Floating Point Exceptions have been reported - 2,249,855,314 cycles # 2.863 GHz - 3,222,814,057 instructions # 1.43 insn per cycle - 0.844200129 seconds time elapsed + 2,244,664,779 cycles # 2.884 GHz + 3,243,168,469 instructions # 1.44 insn per cycle + 0.835323761 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.054095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076295e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076295e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.054078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076959e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.574055 sec +TOTAL : 1.574290 sec INFO: No Floating Point Exceptions have been reported - 4,641,890,435 cycles # 2.943 GHz - 13,165,898,661 instructions # 2.84 insn per cycle - 1.578249512 seconds time elapsed + 4,646,036,617 cycles # 2.945 GHz + 13,166,645,489 instructions # 2.83 insn per cycle + 1.578550564 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.867370e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936884e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.873438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.944671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.944671e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.896912 sec +TOTAL : 0.893595 sec INFO: No Floating Point Exceptions have been reported - 2,636,737,245 cycles # 2.930 GHz - 7,477,755,477 instructions # 2.84 insn per cycle - 0.900719288 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3142) (avx2: 0) (512y: 0) (512z: 0) + 2,639,674,089 cycles # 2.942 GHz + 7,474,954,292 instructions # 2.83 insn per cycle + 0.897961439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.202775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.410191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.410191e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.194979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.406933e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406933e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.531412 sec +TOTAL : 0.532401 sec INFO: No Floating Point Exceptions have been reported - 1,468,072,782 cycles # 2.747 GHz - 3,129,202,339 instructions # 2.13 insn per cycle - 0.535248151 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3109) (512y: 0) (512z: 0) + 1,471,043,256 cycles # 2.744 GHz + 3,127,494,333 instructions # 2.13 insn per cycle + 0.536715670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3111) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.576512e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.841608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.841608e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.604804e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.871054e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.871054e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.477966 sec +TOTAL : 0.474052 sec INFO: No Floating Point Exceptions have been reported - 1,324,577,804 cycles # 2.753 GHz - 2,983,698,636 instructions # 2.25 insn per cycle - 0.481692847 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2869) (512y: 110) (512z: 0) + 1,321,700,799 cycles # 2.767 GHz + 2,981,907,836 instructions # 2.26 insn per cycle + 0.478334854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 110) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.229034e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.331255e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.331255e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.246259e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.348752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.348752e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.757523 sec +TOTAL : 0.751218 sec INFO: No Floating Point Exceptions have been reported - 1,366,953,688 cycles # 1.797 GHz - 1,991,556,146 instructions # 1.46 insn per cycle - 0.761326972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2252) + 1,373,432,632 cycles # 1.819 GHz + 1,989,927,175 instructions # 1.45 insn per cycle + 0.755614240 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2251) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index a10430f205..6ae2d07b8c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:19:52 +DATE: 2024-09-18_13:35:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.222962e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.849418e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.427313e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.333836e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.844165e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.406248e+07 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531972 sec +TOTAL : 0.534496 sec INFO: No Floating Point Exceptions have been reported - 2,207,295,929 cycles # 2.875 GHz - 3,148,652,719 instructions # 1.43 insn per cycle - 0.824191400 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,180,885,043 cycles # 2.827 GHz + 3,135,152,783 instructions # 1.44 insn per cycle + 0.828766444 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134710926110280 Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.605446e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.642912e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.642912e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.605100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.641462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.641462e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.682964 sec +TOTAL : 6.643809 sec INFO: No Floating Point Exceptions have been reported - 19,598,347,374 cycles # 2.930 GHz - 52,065,080,941 instructions # 2.66 insn per cycle - 6.694844262 seconds time elapsed + 19,303,523,142 cycles # 2.904 GHz + 51,922,542,271 instructions # 2.69 insn per cycle + 6.649309354 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.916629e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.051442e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.051442e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.864838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.993187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993187e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.747547 sec +TOTAL : 3.771971 sec INFO: No Floating Point Exceptions have been reported - 11,065,354,139 cycles # 2.943 GHz - 30,912,254,749 instructions # 2.79 insn per cycle - 3.760361851 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2: 0) (512y: 0) (512z: 0) + 10,899,823,947 cycles # 2.886 GHz + 30,797,169,430 instructions # 2.83 insn per cycle + 3.777469678 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.668387e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.008748e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.008748e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.618832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.953390e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.953390e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.399916 sec +TOTAL : 2.382134 sec INFO: No Floating Point Exceptions have been reported - 6,627,221,489 cycles # 2.749 GHz - 13,792,796,598 instructions # 2.08 insn per cycle - 2.412653295 seconds time elapsed + 6,463,553,394 cycles # 2.708 GHz + 13,666,010,364 instructions # 2.11 insn per cycle + 2.387555326 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.129922e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.540308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.540308e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.007992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.398956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.398956e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.198225 sec +TOTAL : 2.205089 sec INFO: No Floating Point Exceptions have been reported - 6,104,431,058 cycles # 2.762 GHz - 13,134,794,290 instructions # 2.15 insn per cycle - 2.210920696 seconds time elapsed + 5,947,846,964 cycles # 2.692 GHz + 13,006,222,979 instructions # 2.19 insn per cycle + 2.210472243 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.449384e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.629220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.629220e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.325208e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493799e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.193674 sec +TOTAL : 3.265150 sec INFO: No Floating Point Exceptions have been reported - 5,993,463,965 cycles # 1.870 GHz - 8,712,960,993 instructions # 1.45 insn per cycle - 3.206184057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1943) + 5,846,999,066 cycles # 1.789 GHz + 8,588,678,582 instructions # 1.47 insn per cycle + 3.271052301 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1946) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 01ceafd1da..a09eaeb7bd 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:20:19 +DATE: 2024-09-18_13:35:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.181438e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.797209e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.367517e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.270085e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.841839e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.403601e+07 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531302 sec +TOTAL : 0.533386 sec INFO: No Floating Point Exceptions have been reported - 2,216,417,295 cycles # 2.883 GHz - 3,137,968,070 instructions # 1.42 insn per cycle - 0.825226040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,214,034,172 cycles # 2.879 GHz + 3,142,399,923 instructions # 1.42 insn per cycle + 0.826419344 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134710926110280 Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.706608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.748828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.748828e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.706120e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.746757e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.746757e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.299466 sec +TOTAL : 6.255979 sec INFO: No Floating Point Exceptions have been reported - 18,540,883,021 cycles # 2.938 GHz - 50,178,474,604 instructions # 2.71 insn per cycle - 6.311951743 seconds time elapsed + 18,389,967,178 cycles # 2.937 GHz + 50,052,771,539 instructions # 2.72 insn per cycle + 6.261520945 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.062664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.211274e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.211274e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.086242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.232589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232589e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.576786 sec +TOTAL : 3.507867 sec INFO: No Floating Point Exceptions have been reported - 10,549,321,378 cycles # 2.940 GHz - 29,289,408,214 instructions # 2.78 insn per cycle - 3.589213709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2: 0) (512y: 0) (512z: 0) + 10,373,977,217 cycles # 2.954 GHz + 29,174,589,795 instructions # 2.81 insn per cycle + 3.513510894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.340015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.632096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.632096e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.355224e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.644479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.644479e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.568858 sec +TOTAL : 2.518598 sec INFO: No Floating Point Exceptions have been reported - 7,118,801,409 cycles # 2.759 GHz - 15,276,261,936 instructions # 2.15 insn per cycle - 2.581007821 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3021) (512y: 0) (512z: 0) + 6,982,239,473 cycles # 2.767 GHz + 15,149,066,703 instructions # 2.17 insn per cycle + 2.524208385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3020) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.507726e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.822175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.822175e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.542431e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.862341e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.862341e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.479110 sec +TOTAL : 2.419194 sec INFO: No Floating Point Exceptions have been reported - 6,890,334,799 cycles # 2.767 GHz - 14,747,969,860 instructions # 2.14 insn per cycle - 2.491499387 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2617) (512y: 302) (512z: 0) + 6,707,959,962 cycles # 2.767 GHz + 14,619,001,595 instructions # 2.18 insn per cycle + 2.424680502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2621) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.315391e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.482899e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.482899e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.289276e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449465e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.316209 sec +TOTAL : 3.297038 sec INFO: No Floating Point Exceptions have been reported - 6,207,380,257 cycles # 1.865 GHz - 10,464,609,822 instructions # 1.69 insn per cycle - 3.328585456 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2130) + 6,083,017,370 cycles # 1.843 GHz + 10,339,705,857 instructions # 1.70 insn per cycle + 3.302657897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2129) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 2ef1c54aa0..50a3de8673 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:20:46 +DATE: 2024-09-18_13:36:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.552559e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.511007e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.603394e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.744477e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.525834e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.617286e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.491259 sec +TOTAL : 0.492808 sec INFO: No Floating Point Exceptions have been reported - 2,068,896,846 cycles # 2.881 GHz - 2,979,901,367 instructions # 1.44 insn per cycle - 0.776426531 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,047,170,412 cycles # 2.828 GHz + 2,929,586,090 instructions # 1.43 insn per cycle + 0.781904908 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313490e+00 Avg ME (F77/GPU) = 4.3136695491848513 Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.683914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.725672e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.725672e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.678996e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.720654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.720654e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.333148 sec +TOTAL : 6.333448 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,682,476,989 cycles # 2.947 GHz - 51,267,470,348 instructions # 2.74 insn per cycle - 6.341547157 seconds time elapsed + 18,607,993,167 cycles # 2.936 GHz + 51,216,519,035 instructions # 2.75 insn per cycle + 6.339213853 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -113,24 +113,24 @@ Avg ME (F77/C++) = 4.3135738277342170 Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.015012e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.280109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.280109e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.022786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.287209e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.287209e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.718050 sec +TOTAL : 2.694054 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,007,664,837 cycles # 2.940 GHz - 19,370,996,217 instructions # 2.42 insn per cycle - 2.726376718 seconds time elapsed + 7,934,623,764 cycles # 2.940 GHz + 19,316,417,604 instructions # 2.43 insn per cycle + 2.699461082 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -147,24 +147,24 @@ Avg ME (F77/C++) = 4.3135722697479650 Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.789023e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799247e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.880495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.877642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.877642e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.452394 sec +TOTAL : 1.418247 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,027,415,196 cycles # 2.759 GHz - 8,886,566,152 instructions # 2.21 insn per cycle - 1.460503609 seconds time elapsed + 3,951,478,174 cycles # 2.777 GHz + 8,833,281,557 instructions # 2.24 insn per cycle + 1.423672827 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,31 +172,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.322323e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.475598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.475598e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.368251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.499225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.499225e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.366660 sec +TOTAL : 1.339980 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,807,036,059 cycles # 2.770 GHz - 8,489,981,547 instructions # 2.23 insn per cycle - 1.374788749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3543) (512y: 20) (512z: 0) + 3,727,978,138 cycles # 2.773 GHz + 8,431,050,226 instructions # 2.26 insn per cycle + 1.345489073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3541) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -204,31 +204,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.974329e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.534282e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.534282e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.964882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.513882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.513882e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.862736 sec +TOTAL : 1.846291 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,570,392,990 cycles # 1.910 GHz - 6,298,404,091 instructions # 1.76 insn per cycle - 1.870756064 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2323) (512y: 24) (512z: 2290) + 3,506,879,162 cycles # 1.895 GHz + 6,243,949,016 instructions # 1.78 insn per cycle + 1.851728712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2325) (512y: 22) (512z: 2290) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -236,8 +236,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 479ebdb204..2b5536237c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:21:08 +DATE: 2024-09-18_13:36:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.776065e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.594605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.702542e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.958341e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.585012e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687226e+08 ) sec^-1 MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.491621 sec +TOTAL : 0.488902 sec INFO: No Floating Point Exceptions have been reported - 2,069,264,305 cycles # 2.877 GHz - 2,928,235,838 instructions # 1.42 insn per cycle - 0.775773692 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,054,269,206 cycles # 2.862 GHz + 2,934,748,812 instructions # 1.43 insn per cycle + 0.774105073 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313490e+00 Avg ME (F77/GPU) = 4.3136695491848513 Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.731608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.775696e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.775696e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.738704e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782579e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.161667 sec +TOTAL : 6.118201 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,113,353,892 cycles # 2.937 GHz - 49,656,566,510 instructions # 2.74 insn per cycle - 6.170127822 seconds time elapsed + 18,018,613,315 cycles # 2.943 GHz + 49,602,263,054 instructions # 2.75 insn per cycle + 6.123752242 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 @@ -113,24 +113,24 @@ Avg ME (F77/C++) = 4.3135738277342170 Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.528214e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.868162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.868162e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.513439e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.846420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.846420e+05 ) sec^-1 MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.421156 sec +TOTAL : 2.410664 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,159,836,216 cycles # 2.949 GHz - 18,538,672,579 instructions # 2.59 insn per cycle - 2.429136947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3234) (avx2: 0) (512y: 0) (512z: 0) + 7,118,641,278 cycles # 2.947 GHz + 18,533,207,759 instructions # 2.60 insn per cycle + 2.416130283 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313572e+00 @@ -147,24 +147,24 @@ Avg ME (F77/C++) = 4.3135722697479650 Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.353305e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.808520e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.808520e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.337179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.778552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.778552e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.063917 sec +TOTAL : 2.052609 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,718,285,058 cycles # 2.760 GHz - 10,903,070,951 instructions # 1.91 insn per cycle - 2.072527320 seconds time elapsed + 5,666,208,381 cycles # 2.754 GHz + 10,850,402,094 instructions # 1.91 insn per cycle + 2.057862471 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -181,24 +181,24 @@ Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.452070e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.924355e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.924355e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.416639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.866517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.866517e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.029021 sec +TOTAL : 2.022314 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,634,694,028 cycles # 2.767 GHz - 10,598,235,094 instructions # 1.88 insn per cycle - 2.037144953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4135) (512y: 12) (512z: 0) + 5,555,880,143 cycles # 2.741 GHz + 10,551,186,314 instructions # 1.90 insn per cycle + 2.027927255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4138) (512y: 12) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 @@ -215,24 +215,24 @@ Avg ME (F77/C++) = 4.3135645242873579 Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.351507e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.637189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.637189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.322863e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.603781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.603781e+05 ) sec^-1 MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.516038 sec +TOTAL : 2.514102 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,700,188,022 cycles # 1.863 GHz - 8,712,811,590 instructions # 1.85 insn per cycle - 2.524039667 seconds time elapsed + 4,668,008,181 cycles # 1.854 GHz + 8,659,615,849 instructions # 1.86 insn per cycle + 2.519706497 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313564e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 7f1052231e..3c9a7750d0 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:21:31 +DATE: 2024-09-18_13:37:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.233444e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.828530e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.380985e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.259037e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.833623e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.391198e+07 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531166 sec +TOTAL : 0.533343 sec INFO: No Floating Point Exceptions have been reported - 2,204,089,924 cycles # 2.871 GHz - 3,157,003,197 instructions # 1.43 insn per cycle - 0.824717149 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,205,791,107 cycles # 2.867 GHz + 3,166,074,888 instructions # 1.44 insn per cycle + 0.826367468 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.513438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546541e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.526469e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.558963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.558963e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 7.083035 sec +TOTAL : 6.979629 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 20,836,157,287 cycles # 2.937 GHz - 52,059,859,689 instructions # 2.50 insn per cycle - 7.095325403 seconds time elapsed + 20,509,216,850 cycles # 2.937 GHz + 51,923,869,243 instructions # 2.53 insn per cycle + 6.985125737 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.708543e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.825114e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.825114e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.719239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.833565e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.833565e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 4.025851 sec +TOTAL : 3.966787 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,676,241,949 cycles # 2.892 GHz - 30,719,909,890 instructions # 2.63 insn per cycle - 4.038601753 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2971) (avx2: 0) (512y: 0) (512z: 0) + 11,507,632,981 cycles # 2.897 GHz + 30,592,941,946 instructions # 2.66 insn per cycle + 3.972658763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.506319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.824086e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.824086e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.525746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.838241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.838241e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.479724 sec +TOTAL : 2.427006 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,856,020,570 cycles # 2.752 GHz - 13,733,686,621 instructions # 2.00 insn per cycle - 2.492002268 seconds time elapsed + 6,694,021,096 cycles # 2.753 GHz + 13,606,483,540 instructions # 2.03 insn per cycle + 2.432521216 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,24 +181,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.929662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.305854e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.305854e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.956630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.333349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.333349e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.278940 sec +TOTAL : 2.225975 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,324,440,516 cycles # 2.761 GHz - 13,099,663,654 instructions # 2.07 insn per cycle - 2.291244442 seconds time elapsed + 6,165,401,380 cycles # 2.764 GHz + 12,974,481,027 instructions # 2.10 insn per cycle + 2.231658259 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -215,24 +215,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.121063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.267193e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.267193e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.095455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.237519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237519e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.513123 sec +TOTAL : 3.497825 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,549,229,378 cycles # 1.858 GHz - 8,826,958,587 instructions # 1.35 insn per cycle - 3.525479379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2013) + 6,439,450,147 cycles # 1.839 GHz + 8,701,510,932 instructions # 1.35 insn per cycle + 3.503267717 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2014) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index b5ff528c40..008d0a9d35 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-15_12:21:59 +DATE: 2024-09-18_13:37:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.263927e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.696044e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.276288e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.252482e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.819370e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.388849e+07 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.532325 sec +TOTAL : 0.535514 sec INFO: No Floating Point Exceptions have been reported - 2,205,625,214 cycles # 2.868 GHz - 3,174,502,414 instructions # 1.44 insn per cycle - 0.825987629 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,214,143,603 cycles # 2.876 GHz + 3,159,539,235 instructions # 1.43 insn per cycle + 0.828878265 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.606903e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.644535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.644535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.608272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.644385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644385e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.677680 sec +TOTAL : 6.629223 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 19,666,494,079 cycles # 2.940 GHz - 50,081,060,677 instructions # 2.55 insn per cycle - 6.689882991 seconds time elapsed + 19,498,919,287 cycles # 2.939 GHz + 49,953,158,127 instructions # 2.56 insn per cycle + 6.634747708 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.871340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.003903e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.003903e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.887478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.016037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016037e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.840726 sec +TOTAL : 3.741933 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,259,304,877 cycles # 2.923 GHz - 29,230,934,183 instructions # 2.60 insn per cycle - 3.852980170 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2807) (avx2: 0) (512y: 0) (512z: 0) + 11,045,759,193 cycles # 2.948 GHz + 29,138,468,069 instructions # 2.64 insn per cycle + 3.747566884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.726077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.943109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.943109e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.735821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.946793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.946793e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.970419 sec +TOTAL : 2.917806 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,229,432,856 cycles # 2.759 GHz - 15,297,097,015 instructions # 1.86 insn per cycle - 2.983820409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3202) (512y: 0) (512z: 0) + 8,064,126,572 cycles # 2.759 GHz + 15,188,166,070 instructions # 1.88 insn per cycle + 2.923408860 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3203) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,24 +181,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.908170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.934941e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167424e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.839910 sec +TOTAL : 2.773801 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,866,112,540 cycles # 2.759 GHz - 14,608,431,526 instructions # 1.86 insn per cycle - 2.852893659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2774) (512y: 304) (512z: 0) + 7,685,843,393 cycles # 2.766 GHz + 14,482,526,269 instructions # 1.88 insn per cycle + 2.779397074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 304) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -215,24 +215,24 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.030076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.169205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.169205e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.028557e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.163339e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.163339e+05 ) sec^-1 MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.616163 sec +TOTAL : 3.571755 sec INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,699,969,895 cycles # 1.847 GHz - 10,018,865,936 instructions # 1.50 insn per cycle - 3.629335211 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2217) + 6,530,752,454 cycles # 1.826 GHz + 9,894,967,129 instructions # 1.52 insn per cycle + 3.577461945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2216) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 7707f676a6..052ae7ee83 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:18:46 +DATE: 2024-09-18_13:34:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.767516e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.784818e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.787795e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.764082e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.781890e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.785142e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470421 sec +TOTAL : 0.472813 sec INFO: No Floating Point Exceptions have been reported - 1,978,272,924 cycles # 2.864 GHz - 2,912,164,766 instructions # 1.47 insn per cycle - 0.749211691 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 + 1,988,958,737 cycles # 2.864 GHz + 2,937,434,860 instructions # 1.48 insn per cycle + 0.752740146 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.005244e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.117313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.126114e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.003017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.119483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.127951e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483026 sec +TOTAL : 0.489631 sec INFO: No Floating Point Exceptions have been reported - 2,024,260,948 cycles # 2.878 GHz - 3,029,497,927 instructions # 1.50 insn per cycle - 0.762830166 seconds time elapsed + 2,045,084,255 cycles # 2.869 GHz + 3,023,069,261 instructions # 1.48 insn per cycle + 0.771335484 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562860176604E-006 Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.405701e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.409074e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409074e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.395968e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399199e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399199e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157165 sec +TOTAL : 0.157638 sec INFO: No Floating Point Exceptions have been reported - 467,074,127 cycles # 2.919 GHz - 1,389,682,298 instructions # 2.98 insn per cycle - 0.160520641 seconds time elapsed + 469,190,775 cycles # 2.915 GHz + 1,389,792,831 instructions # 2.96 insn per cycle + 0.161480291 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.459230e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.470849e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.470849e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.497864e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.511372e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.511372e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.083614 sec +TOTAL : 0.083349 sec INFO: No Floating Point Exceptions have been reported - 239,038,405 cycles # 2.765 GHz - 692,921,675 instructions # 2.90 insn per cycle - 0.087016440 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2: 0) (512y: 0) (512z: 0) + 241,222,273 cycles # 2.780 GHz + 693,002,253 instructions # 2.87 insn per cycle + 0.087370180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.419984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.425694e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.425694e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.431164e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.437397e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.437397e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039288 sec +TOTAL : 0.038955 sec INFO: No Floating Point Exceptions have been reported - 113,366,397 cycles # 2.696 GHz - 257,996,166 instructions # 2.28 insn per cycle - 0.042698199 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8503) (512y: 0) (512z: 0) + 115,308,474 cycles # 2.709 GHz + 257,920,071 instructions # 2.24 insn per cycle + 0.043236547 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.624961e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.632288e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.632288e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.580017e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.587312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587312e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034517 sec +TOTAL : 0.035473 sec INFO: No Floating Point Exceptions have been reported - 101,263,068 cycles # 2.711 GHz - 239,969,377 instructions # 2.37 insn per cycle - 0.037861089 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8140) (512y: 150) (512z: 0) + 102,969,893 cycles # 2.655 GHz + 240,051,517 instructions # 2.33 insn per cycle + 0.039391596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8143) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.199166e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.204857e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.204857e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.194413e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199659e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199659e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046301 sec +TOTAL : 0.046450 sec INFO: No Floating Point Exceptions have been reported - 89,031,390 cycles # 1.806 GHz - 134,346,666 instructions # 1.51 insn per cycle - 0.049794003 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7090) + 90,344,224 cycles # 1.811 GHz + 134,320,028 instructions # 1.49 insn per cycle + 0.050486009 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7086) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index ca3a407fd8..a192f75604 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:18:57 +DATE: 2024-09-18_13:34:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.802842e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821481e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.824642e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.801517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.819462e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.822714e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470683 sec +TOTAL : 0.476112 sec INFO: No Floating Point Exceptions have been reported - 1,990,204,132 cycles # 2.870 GHz - 2,908,985,105 instructions # 1.46 insn per cycle - 0.750697991 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 + 2,002,117,199 cycles # 2.852 GHz + 2,866,160,766 instructions # 1.43 insn per cycle + 0.760468280 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.083966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.205394e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.213656e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.078707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.191865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.200143e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.487582 sec +TOTAL : 0.489613 sec INFO: No Floating Point Exceptions have been reported - 2,025,056,560 cycles # 2.852 GHz - 2,989,421,142 instructions # 1.48 insn per cycle - 0.769117174 seconds time elapsed + 2,042,241,108 cycles # 2.868 GHz + 2,998,598,647 instructions # 1.47 insn per cycle + 0.772399138 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562860176604E-006 Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.394566e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.397741e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.397741e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.407520e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410948e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410948e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157033 sec +TOTAL : 0.156563 sec INFO: No Floating Point Exceptions have been reported - 465,720,728 cycles # 2.911 GHz - 1,385,003,144 instructions # 2.97 insn per cycle - 0.160593741 seconds time elapsed + 466,584,758 cycles # 2.920 GHz + 1,385,250,664 instructions # 2.97 insn per cycle + 0.160376464 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.474186e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.485931e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.485931e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.449696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462962e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462962e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.082867 sec +TOTAL : 0.083015 sec INFO: No Floating Point Exceptions have been reported - 237,575,401 cycles # 2.770 GHz - 689,116,420 instructions # 2.90 insn per cycle - 0.086305788 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2: 0) (512y: 0) (512z: 0) + 239,636,465 cycles # 2.770 GHz + 689,080,119 instructions # 2.88 insn per cycle + 0.087201828 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.436754e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442531e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.414254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.419861e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.419861e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038039 sec +TOTAL : 0.038646 sec INFO: No Floating Point Exceptions have been reported - 110,520,646 cycles # 2.700 GHz - 253,448,082 instructions # 2.29 insn per cycle - 0.041474271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8458) (512y: 0) (512z: 0) + 111,994,100 cycles # 2.669 GHz + 253,518,298 instructions # 2.26 insn per cycle + 0.042520952 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8457) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.611056e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.618327e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.618327e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.642367e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.650155e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.650155e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034052 sec +TOTAL : 0.033489 sec INFO: No Floating Point Exceptions have been reported - 98,863,837 cycles # 2.687 GHz - 235,605,174 instructions # 2.38 insn per cycle - 0.037353270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8098) (512y: 150) (512z: 0) + 100,655,003 cycles # 2.733 GHz + 235,667,417 instructions # 2.34 insn per cycle + 0.037423166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8101) (512y: 150) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.176506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.181658e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.198873e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.203973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203973e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046442 sec +TOTAL : 0.045540 sec INFO: No Floating Point Exceptions have been reported - 86,647,290 cycles # 1.764 GHz - 129,720,267 instructions # 1.50 insn per cycle - 0.049837932 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7094) + 88,110,981 cycles # 1.799 GHz + 129,713,745 instructions # 1.47 insn per cycle + 0.049588057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7084) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 0df257cc6a..0a43242226 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:19:09 +DATE: 2024-09-18_13:34:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.204232e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.214249e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.216451e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.214942e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.224129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.226295e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.474860 sec +TOTAL : 0.475163 sec INFO: No Floating Point Exceptions have been reported - 1,958,003,333 cycles # 2.836 GHz - 2,859,472,548 instructions # 1.46 insn per cycle - 0.747968607 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 + 1,995,760,495 cycles # 2.876 GHz + 2,898,607,116 instructions # 1.45 insn per cycle + 0.751350588 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.933159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.016706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.024424e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.954269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.031370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.039107e+05 ) sec^-1 MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 -TOTAL : 0.477251 sec +TOTAL : 0.476219 sec INFO: No Floating Point Exceptions have been reported - 1,992,584,596 cycles # 2.867 GHz - 2,884,692,368 instructions # 1.45 insn per cycle - 0.751767434 seconds time elapsed + 1,999,149,645 cycles # 2.878 GHz + 2,913,422,324 instructions # 1.46 insn per cycle + 0.751593441 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127250e-06 Avg ME (F77/GPU) = 8.1272869669930272E-006 Relative difference = 4.548524165778887e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.431583e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.434922e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.434922e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.411294e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414706e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.155945 sec +TOTAL : 0.156868 sec INFO: No Floating Point Exceptions have been reported - 462,855,819 cycles # 2.915 GHz - 1,381,844,785 instructions # 2.99 insn per cycle - 0.159290331 seconds time elapsed + 464,525,374 cycles # 2.900 GHz + 1,382,008,460 instructions # 2.98 insn per cycle + 0.160803882 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105271212486E-006 Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.210882e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.215211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.215211e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.203598e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208165e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.045512 sec +TOTAL : 0.045866 sec INFO: No Floating Point Exceptions have been reported - 131,360,157 cycles # 2.718 GHz - 372,013,509 instructions # 2.83 insn per cycle - 0.048801319 seconds time elapsed + 133,138,155 cycles # 2.706 GHz + 372,169,369 instructions # 2.80 insn per cycle + 0.049817482 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.769306e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.791871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.791871e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.784499e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.809977e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.809977e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.021120 sec +TOTAL : 0.020924 sec INFO: No Floating Point Exceptions have been reported - 64,157,831 cycles # 2.680 GHz - 142,829,765 instructions # 2.23 insn per cycle - 0.024479209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9251) (512y: 0) (512z: 0) + 65,424,959 cycles # 2.700 GHz + 142,812,066 instructions # 2.18 insn per cycle + 0.024819725 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9241) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.078002e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.105354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.105354e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.962557e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.993097e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993097e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.019156 sec +TOTAL : 0.019867 sec INFO: No Floating Point Exceptions have been reported - 59,143,033 cycles # 2.685 GHz - 132,774,537 instructions # 2.24 insn per cycle - 0.022562262 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8963) (512y: 28) (512z: 0) + 60,581,334 cycles # 2.611 GHz + 132,865,474 instructions # 2.19 insn per cycle + 0.023738141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8959) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.363857e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.386270e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.386270e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.316896e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.339579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.339579e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.024531 sec +TOTAL : 0.024992 sec INFO: No Floating Point Exceptions have been reported - 51,349,038 cycles # 1.867 GHz - 79,557,658 instructions # 1.55 insn per cycle - 0.028087213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2834) (512y: 32) (512z: 7442) + 52,575,011 cycles # 1.850 GHz + 79,563,519 instructions # 1.51 insn per cycle + 0.029028726 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2836) (512y: 30) (512z: 7437) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 1f92901611..81fec428b9 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:19:19 +DATE: 2024-09-18_13:34:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.234747e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.244150e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246143e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.237744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247254e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.249233e+04 ) sec^-1 MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.473072 sec +TOTAL : 0.475021 sec INFO: No Floating Point Exceptions have been reported - 1,983,051,703 cycles # 2.868 GHz - 2,920,120,611 instructions # 1.47 insn per cycle - 0.748230768 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 + 1,993,743,022 cycles # 2.872 GHz + 2,918,324,117 instructions # 1.46 insn per cycle + 0.750958800 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.099236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.191571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.199199e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.067375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.148140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156186e+05 ) sec^-1 MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.474597 sec +TOTAL : 0.476461 sec INFO: No Floating Point Exceptions have been reported - 1,988,967,454 cycles # 2.876 GHz - 2,944,964,203 instructions # 1.48 insn per cycle - 0.748107743 seconds time elapsed + 1,993,725,610 cycles # 2.868 GHz + 2,900,779,066 instructions # 1.45 insn per cycle + 0.752726088 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127250e-06 Avg ME (F77/GPU) = 8.1272866419447706E-006 Relative difference = 4.508529302013153e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.448809e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.452114e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.452114e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.438799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.442175e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.442175e+03 ) sec^-1 MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.154406 sec +TOTAL : 0.155006 sec INFO: No Floating Point Exceptions have been reported - 460,841,033 cycles # 2.931 GHz - 1,376,637,796 instructions # 2.99 insn per cycle - 0.157690889 seconds time elapsed + 462,147,018 cycles # 2.920 GHz + 1,376,798,562 instructions # 2.98 insn per cycle + 0.158894971 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127811e-06 Avg ME (F77/C++) = 8.1278105271212486E-006 Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.217964e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.222354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.222354e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.224501e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229267e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229267e+04 ) sec^-1 MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.044518 sec +TOTAL : 0.044280 sec INFO: No Floating Point Exceptions have been reported - 129,447,390 cycles # 2.729 GHz - 367,192,934 instructions # 2.84 insn per cycle - 0.047990838 seconds time elapsed + 130,643,774 cycles # 2.744 GHz + 367,253,267 instructions # 2.81 insn per cycle + 0.048214582 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127809e-06 Avg ME (F77/C++) = 8.1278090510674588E-006 Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.769546e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.792490e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.792490e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.785213e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.809806e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.809806e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020349 sec +TOTAL : 0.020207 sec INFO: No Floating Point Exceptions have been reported - 62,145,033 cycles # 2.684 GHz - 138,048,264 instructions # 2.22 insn per cycle - 0.023682982 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9205) (512y: 0) (512z: 0) + 63,247,605 cycles # 2.692 GHz + 138,006,301 instructions # 2.18 insn per cycle + 0.024065097 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.058079e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.086570e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.086570e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.053192e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081685e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081685e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.018477 sec +TOTAL : 0.018541 sec INFO: No Floating Point Exceptions have been reported - 56,677,502 cycles # 2.660 GHz - 127,963,925 instructions # 2.26 insn per cycle - 0.021825959 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8919) (512y: 28) (512z: 0) + 58,249,945 cycles # 2.668 GHz + 127,981,629 instructions # 2.20 insn per cycle + 0.022408862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8910) (512y: 28) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275366216540664E-006 Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.337142e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.358958e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.358958e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.336383e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.358299e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.358299e+04 ) sec^-1 MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.023953 sec +TOTAL : 0.024035 sec INFO: No Floating Point Exceptions have been reported - 48,824,483 cycles # 1.820 GHz - 74,785,723 instructions # 1.53 insn per cycle - 0.027430916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2789) (512y: 32) (512z: 7444) + 50,478,559 cycles # 1.838 GHz + 74,763,022 instructions # 1.48 insn per cycle + 0.028059996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2791) (512y: 30) (512z: 7439) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127537e-06 Avg ME (F77/C++) = 8.1275369863475849E-006 Relative difference = 1.6797726498700304e-09 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index c9ae973486..59d9b0aed3 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:19:30 +DATE: 2024-09-18_13:35:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.749294e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.767595e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.770609e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.754823e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.776415e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779447e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.467392 sec +TOTAL : 0.471377 sec INFO: No Floating Point Exceptions have been reported - 1,983,595,553 cycles # 2.874 GHz - 2,922,486,219 instructions # 1.47 insn per cycle - 0.746529670 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 + 1,997,323,985 cycles # 2.874 GHz + 2,899,694,458 instructions # 1.45 insn per cycle + 0.752307454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.927630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.040034e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.047831e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.948061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.061017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.069565e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485867 sec +TOTAL : 0.489196 sec INFO: No Floating Point Exceptions have been reported - 2,031,462,606 cycles # 2.875 GHz - 3,037,983,552 instructions # 1.50 insn per cycle - 0.765937206 seconds time elapsed + 2,036,665,309 cycles # 2.870 GHz + 3,021,584,007 instructions # 1.48 insn per cycle + 0.771275990 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562879405200E-006 Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.382949e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.386200e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386200e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.346871e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.350343e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.350343e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.158173 sec +TOTAL : 0.160031 sec INFO: No Floating Point Exceptions have been reported - 471,387,733 cycles # 2.929 GHz - 1,398,281,899 instructions # 2.97 insn per cycle - 0.161473463 seconds time elapsed + 472,933,421 cycles # 2.893 GHz + 1,398,381,136 instructions # 2.96 insn per cycle + 0.164085482 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.673807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.686050e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.686050e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.641661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653702e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653702e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080997 sec +TOTAL : 0.081430 sec INFO: No Floating Point Exceptions have been reported - 235,160,008 cycles # 2.808 GHz - 688,033,850 instructions # 2.93 insn per cycle - 0.084339129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9328) (avx2: 0) (512y: 0) (512z: 0) + 237,272,954 cycles # 2.797 GHz + 688,192,491 instructions # 2.90 insn per cycle + 0.085340914 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.415459e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422136e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422136e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.416781e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422580e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422580e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039362 sec +TOTAL : 0.039309 sec INFO: No Floating Point Exceptions have been reported - 112,339,380 cycles # 2.665 GHz - 253,052,093 instructions # 2.25 insn per cycle - 0.042695307 seconds time elapsed + 114,214,565 cycles # 2.672 GHz + 253,122,283 instructions # 2.22 insn per cycle + 0.043386095 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.648852e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.656658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.656658e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.596060e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.604256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.604256e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034079 sec +TOTAL : 0.035081 sec INFO: No Floating Point Exceptions have been reported - 100,217,114 cycles # 2.715 GHz - 233,607,212 instructions # 2.33 insn per cycle - 0.037476380 seconds time elapsed + 101,856,642 cycles # 2.646 GHz + 233,656,157 instructions # 2.29 insn per cycle + 0.039147600 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.192314e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197366e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197366e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.146549e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.151691e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151691e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046499 sec +TOTAL : 0.048254 sec INFO: No Floating Point Exceptions have been reported - 89,493,670 cycles # 1.812 GHz - 133,128,515 instructions # 1.49 insn per cycle - 0.049962595 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6356) + 91,587,165 cycles # 1.768 GHz + 133,174,500 instructions # 1.45 insn per cycle + 0.052446048 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6354) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 1d81f994cb..6686b30b4b 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-15_12:19:41 +DATE: 2024-09-18_13:35:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.765961e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.783708e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.789350e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.784162e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.808686e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813201e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.474700 sec +TOTAL : 0.333667 sec INFO: No Floating Point Exceptions have been reported - 1,974,719,372 cycles # 2.833 GHz - 2,899,642,626 instructions # 1.47 insn per cycle - 0.754986373 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 + 1,240,871,647 cycles # 2.848 GHz + 2,449,109,840 instructions # 1.97 insn per cycle + 0.615625688 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.058757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.171730e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.179415e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.062992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.177330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.186889e+05 ) sec^-1 MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.484755 sec +TOTAL : 0.492547 sec INFO: No Floating Point Exceptions have been reported - 2,032,400,386 cycles # 2.878 GHz - 3,034,442,470 instructions # 1.49 insn per cycle - 0.765490241 seconds time elapsed + 2,036,892,131 cycles # 2.842 GHz + 3,016,344,751 instructions # 1.48 insn per cycle + 0.776057336 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 Avg ME (F77/GPU) = 8.1274562879405200E-006 Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.419840e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.423095e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423095e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.396244e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399563e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399563e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.155760 sec +TOTAL : 0.156900 sec INFO: No Floating Point Exceptions have been reported - 467,249,665 cycles # 2.946 GHz - 1,393,566,061 instructions # 2.98 insn per cycle - 0.159156822 seconds time elapsed + 468,878,349 cycles # 2.927 GHz + 1,393,744,642 instructions # 2.97 insn per cycle + 0.160773641 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.647634e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.659890e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.659890e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.703638e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.716215e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.716215e+03 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080628 sec +TOTAL : 0.080002 sec INFO: No Floating Point Exceptions have been reported - 234,377,416 cycles # 2.808 GHz - 684,139,763 instructions # 2.92 insn per cycle - 0.083918243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9361) (avx2: 0) (512y: 0) (512z: 0) + 235,588,650 cycles # 2.827 GHz + 684,259,138 instructions # 2.90 insn per cycle + 0.083821193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.444361e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.450464e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.450464e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.433569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.439450e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.439450e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037847 sec +TOTAL : 0.038120 sec INFO: No Floating Point Exceptions have been reported - 110,057,998 cycles # 2.704 GHz - 248,602,467 instructions # 2.26 insn per cycle - 0.041225455 seconds time elapsed + 111,841,703 cycles # 2.696 GHz + 248,650,538 instructions # 2.22 insn per cycle + 0.042017351 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.658647e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.666343e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.666343e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.614208e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621785e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621785e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.033122 sec +TOTAL : 0.034000 sec INFO: No Floating Point Exceptions have been reported - 97,824,445 cycles # 2.731 GHz - 229,151,030 instructions # 2.34 insn per cycle - 0.036353420 seconds time elapsed + 99,535,427 cycles # 2.668 GHz + 229,238,314 instructions # 2.30 insn per cycle + 0.037858332 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.188861e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.193842e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193842e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.195361e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200436e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200436e+04 ) sec^-1 MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046038 sec +TOTAL : 0.045690 sec INFO: No Floating Point Exceptions have been reported - 87,629,988 cycles # 1.795 GHz - 128,556,729 instructions # 1.47 insn per cycle - 0.049600721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6356) + 89,777,680 cycles # 1.821 GHz + 128,604,385 instructions # 1.43 insn per cycle + 0.049950768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6355) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 808bf6828b..62aa2351ef 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:17:33 +DATE: 2024-09-18_13:33:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.071674e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.333003e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.756234e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.107848e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.349751e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801252e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.519163 sec +TOTAL : 0.525011 sec INFO: No Floating Point Exceptions have been reported - 2,179,162,165 cycles # 2.882 GHz - 3,070,881,799 instructions # 1.41 insn per cycle - 0.812256060 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,191,922,668 cycles # 2.881 GHz + 3,124,854,662 instructions # 1.43 insn per cycle + 0.820527123 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.736392e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.961242e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.961242e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.117531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040993e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040993e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.358463 sec +TOTAL : 1.263727 sec INFO: No Floating Point Exceptions have been reported - 3,905,907,731 cycles # 2.851 GHz - 9,863,781,254 instructions # 2.53 insn per cycle - 1.371009162 seconds time elapsed + 3,735,375,700 cycles # 2.944 GHz + 9,727,971,651 instructions # 2.60 insn per cycle + 1.269703149 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.459599e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.873378e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.873378e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.512691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947484e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947484e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.877453 sec +TOTAL : 0.807869 sec INFO: No Floating Point Exceptions have been reported - 2,486,018,663 cycles # 2.796 GHz - 6,068,811,134 instructions # 2.44 insn per cycle - 0.890013058 seconds time elapsed + 2,332,400,363 cycles # 2.869 GHz + 5,932,883,831 instructions # 2.54 insn per cycle + 0.813712795 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.202336e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.241982e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.241982e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.185960e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.183533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.183533e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.633614 sec +TOTAL : 0.594661 sec INFO: No Floating Point Exceptions have been reported - 1,818,277,006 cycles # 2.816 GHz - 3,450,832,845 instructions # 1.90 insn per cycle - 0.646259584 seconds time elapsed + 1,663,371,411 cycles # 2.773 GHz + 3,314,486,720 instructions # 1.99 insn per cycle + 0.600516021 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.283565e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.391222e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.391222e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.219367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.251513e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.251513e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.617501 sec +TOTAL : 0.587632 sec INFO: No Floating Point Exceptions have been reported - 1,780,688,704 cycles # 2.829 GHz - 3,420,263,634 instructions # 1.92 insn per cycle - 0.630172459 seconds time elapsed + 1,614,839,496 cycles # 2.724 GHz + 3,284,546,277 instructions # 2.03 insn per cycle + 0.593339482 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.121818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.051040e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.051040e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.129616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.055946e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.055946e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.653754 sec +TOTAL : 0.606967 sec INFO: No Floating Point Exceptions have been reported - 1,527,075,900 cycles # 2.294 GHz - 2,560,289,188 instructions # 1.68 insn per cycle - 0.666212420 seconds time elapsed + 1,366,903,692 cycles # 2.234 GHz + 2,424,948,880 instructions # 1.77 insn per cycle + 0.612713832 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 06cbb3e926..239bb47b8a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:17:46 +DATE: 2024-09-18_13:33:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.969739e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.449933e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971179e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.181260e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.490249e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991797e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.523956 sec +TOTAL : 0.525256 sec INFO: No Floating Point Exceptions have been reported - 2,197,273,631 cycles # 2.867 GHz - 3,116,260,127 instructions # 1.42 insn per cycle - 0.822598423 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,186,851,153 cycles # 2.864 GHz + 3,107,286,620 instructions # 1.42 insn per cycle + 0.822741231 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.011162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.031790e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.031790e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.043560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033362e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.321281 sec +TOTAL : 1.273177 sec INFO: No Floating Point Exceptions have been reported - 3,893,594,822 cycles # 2.920 GHz - 9,744,555,445 instructions # 2.50 insn per cycle - 1.334263922 seconds time elapsed + 3,715,871,529 cycles # 2.906 GHz + 9,610,590,320 instructions # 2.59 insn per cycle + 1.279195540 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.381914e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838565e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838565e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.470593e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877997e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.917902 sec +TOTAL : 0.826368 sec INFO: No Floating Point Exceptions have been reported - 2,659,815,270 cycles # 2.862 GHz - 6,026,660,919 instructions # 2.27 insn per cycle - 0.930359460 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2: 0) (512y: 0) (512z: 0) + 2,333,894,912 cycles # 2.807 GHz + 5,878,357,831 instructions # 2.52 insn per cycle + 0.832251124 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.192597e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.249251e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.249251e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.242144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308218e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308218e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.636612 sec +TOTAL : 0.580800 sec INFO: No Floating Point Exceptions have been reported - 1,830,188,885 cycles # 2.821 GHz - 3,421,758,036 instructions # 1.87 insn per cycle - 0.649438298 seconds time elapsed + 1,655,777,920 cycles # 2.827 GHz + 3,287,720,584 instructions # 1.99 insn per cycle + 0.586391271 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.271531e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.393290e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.393290e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.289151e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391409e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391409e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.619901 sec +TOTAL : 0.570490 sec INFO: No Floating Point Exceptions have been reported - 1,782,873,131 cycles # 2.820 GHz - 3,395,941,059 instructions # 1.90 insn per cycle - 0.632832806 seconds time elapsed + 1,622,799,576 cycles # 2.819 GHz + 3,260,934,090 instructions # 2.01 insn per cycle + 0.576408659 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.134142e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.070820e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.070820e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.147175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094895e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094895e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.650853 sec +TOTAL : 0.601115 sec INFO: No Floating Point Exceptions have been reported - 1,547,796,135 cycles # 2.334 GHz - 2,545,431,106 instructions # 1.64 insn per cycle - 0.664117617 seconds time elapsed + 1,376,859,663 cycles # 2.272 GHz + 2,409,979,343 instructions # 1.75 insn per cycle + 0.607114374 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 6bbc9fb0da..d290e84a6a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:17:58 +DATE: 2024-09-18_13:33:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.000684e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.992629e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.388281e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.032821e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.078089e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.480611e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.488697 sec +TOTAL : 0.485441 sec INFO: No Floating Point Exceptions have been reported - 2,047,674,909 cycles # 2.864 GHz - 2,921,802,724 instructions # 1.43 insn per cycle - 0.773667864 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,051,454,700 cycles # 2.873 GHz + 2,936,249,934 instructions # 1.43 insn per cycle + 0.771058253 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 Avg ME (F77/GPU) = 0.14771956735057756 Relative difference = 4.559355911674916e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.029870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042886e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042886e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.100530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045913e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.272870 sec +TOTAL : 1.244009 sec INFO: No Floating Point Exceptions have been reported - 3,752,313,957 cycles # 2.930 GHz - 9,659,106,684 instructions # 2.57 insn per cycle - 1.281538641 seconds time elapsed + 3,662,603,595 cycles # 2.932 GHz + 9,601,734,780 instructions # 2.62 insn per cycle + 1.249887433 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.197021e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.333033e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.333033e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.260293e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.450195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.450195e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.588107 sec +TOTAL : 0.554122 sec INFO: No Floating Point Exceptions have been reported - 1,715,029,446 cycles # 2.877 GHz - 4,025,277,973 instructions # 2.35 insn per cycle - 0.596790312 seconds time elapsed + 1,637,956,120 cycles # 2.928 GHz + 3,967,181,530 instructions # 2.42 insn per cycle + 0.560033790 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955861942843 Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.961392e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.263984e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.263984e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.018941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.312758e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.312758e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.463961 sec +TOTAL : 0.436114 sec INFO: No Floating Point Exceptions have been reported - 1,335,854,072 cycles # 2.831 GHz - 2,555,445,671 instructions # 1.91 insn per cycle - 0.472547002 seconds time elapsed + 1,253,193,980 cycles # 2.841 GHz + 2,497,513,333 instructions # 1.99 insn per cycle + 0.441707702 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.064902e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.593039e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.593039e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.105058e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.563425e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.563425e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.451960 sec +TOTAL : 0.426869 sec INFO: No Floating Point Exceptions have been reported - 1,308,601,918 cycles # 2.845 GHz - 2,529,434,362 instructions # 1.93 insn per cycle - 0.460618771 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1867) (512y: 1) (512z: 0) + 1,223,516,570 cycles # 2.834 GHz + 2,473,072,662 instructions # 2.02 insn per cycle + 0.432489185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1870) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.884808e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.904943e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.904943e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.875374e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829234e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829234e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.475883 sec +TOTAL : 0.455920 sec INFO: No Floating Point Exceptions have been reported - 1,154,211,341 cycles # 2.384 GHz - 2,131,381,757 instructions # 1.85 insn per cycle - 0.484642507 seconds time elapsed + 1,079,442,551 cycles # 2.341 GHz + 2,072,975,829 instructions # 1.92 insn per cycle + 0.461745309 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955262403935 Relative difference = 3.207154680524219e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 5f533fb3cd..12dbe0a7bb 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:18:10 +DATE: 2024-09-18_13:33:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.019765e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.955728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.339790e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.057555e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.155700e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.563343e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.485800 sec +TOTAL : 0.485884 sec INFO: No Floating Point Exceptions have been reported - 2,046,836,184 cycles # 2.868 GHz - 2,861,763,521 instructions # 1.40 insn per cycle - 0.770456449 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,043,628,192 cycles # 2.869 GHz + 2,916,801,925 instructions # 1.43 insn per cycle + 0.771023658 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 Avg ME (F77/GPU) = 0.14771956525510177 Relative difference = 4.4175008557828484e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.176079e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.058039e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.191416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056446e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056446e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.251903 sec +TOTAL : 1.229162 sec INFO: No Floating Point Exceptions have been reported - 3,703,836,740 cycles # 2.940 GHz - 9,528,821,992 instructions # 2.57 insn per cycle - 1.260572218 seconds time elapsed + 3,623,698,938 cycles # 2.936 GHz + 9,471,242,034 instructions # 2.61 insn per cycle + 1.234707648 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.192175e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.322296e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.322296e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.264406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455240e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455240e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.587889 sec +TOTAL : 0.551418 sec INFO: No Floating Point Exceptions have been reported - 1,712,573,858 cycles # 2.874 GHz - 3,991,164,090 instructions # 2.33 insn per cycle - 0.596469979 seconds time elapsed + 1,633,608,321 cycles # 2.938 GHz + 3,933,410,721 instructions # 2.41 insn per cycle + 0.556738925 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955861942843 Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.984131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.291891e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.291891e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.014495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.293948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.293948e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.460621 sec +TOTAL : 0.435710 sec INFO: No Floating Point Exceptions have been reported - 1,332,768,943 cycles # 2.844 GHz - 2,539,760,549 instructions # 1.91 insn per cycle - 0.469223881 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1815) (512y: 0) (512z: 0) + 1,251,845,572 cycles # 2.841 GHz + 2,481,653,408 instructions # 1.98 insn per cycle + 0.441241697 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1817) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.068338e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.608348e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.608348e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.125464e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.603160e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.603160e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.451660 sec +TOTAL : 0.424252 sec INFO: No Floating Point Exceptions have been reported - 1,303,705,490 cycles # 2.835 GHz - 2,516,660,988 instructions # 1.93 insn per cycle - 0.460426647 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1776) (512y: 1) (512z: 0) + 1,222,912,229 cycles # 2.849 GHz + 2,456,305,937 instructions # 2.01 insn per cycle + 0.429917564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1773) (512y: 1) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955698961392 Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.904674e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.952335e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.952335e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.934438e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.010088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.010088e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.470523 sec +TOTAL : 0.446349 sec INFO: No Floating Point Exceptions have been reported - 1,148,816,748 cycles # 2.401 GHz - 2,115,600,264 instructions # 1.84 insn per cycle - 0.478989217 seconds time elapsed + 1,072,792,214 cycles # 2.378 GHz + 2,057,138,403 instructions # 1.92 insn per cycle + 0.451920157 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771955262403935 Relative difference = 3.207154680524219e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 45ada3a90e..8f7e2917bf 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:18:21 +DATE: 2024-09-18_13:33:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.081665e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.353918e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.800720e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.090014e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319571e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.751667e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.522074 sec +TOTAL : 0.522308 sec INFO: No Floating Point Exceptions have been reported - 2,188,417,927 cycles # 2.886 GHz - 3,109,980,535 instructions # 1.42 insn per cycle - 0.814803667 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,179,170,623 cycles # 2.882 GHz + 3,109,984,327 instructions # 1.43 insn per cycle + 0.814646692 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.910495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.017920e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.017920e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.952529e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.022459e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022459e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.334640 sec +TOTAL : 1.284601 sec INFO: No Floating Point Exceptions have been reported - 3,942,717,867 cycles # 2.929 GHz - 9,888,397,619 instructions # 2.51 insn per cycle - 1.346816311 seconds time elapsed + 3,782,838,045 cycles # 2.933 GHz + 9,753,328,321 instructions # 2.58 insn per cycle + 1.290389924 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.550381e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.026476e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.026476e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.563360e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027715e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.835395 sec +TOTAL : 0.783750 sec INFO: No Floating Point Exceptions have been reported - 2,474,407,927 cycles # 2.921 GHz - 6,051,781,084 instructions # 2.45 insn per cycle - 0.847852996 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1410) (avx2: 0) (512y: 0) (512z: 0) + 2,313,452,686 cycles # 2.933 GHz + 5,920,736,181 instructions # 2.56 insn per cycle + 0.789531453 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.251869e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.352067e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352067e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.274544e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372577e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372577e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.625126 sec +TOTAL : 0.574756 sec INFO: No Floating Point Exceptions have been reported - 1,795,351,792 cycles # 2.819 GHz - 3,389,782,871 instructions # 1.89 insn per cycle - 0.637929251 seconds time elapsed + 1,639,105,587 cycles # 2.827 GHz + 3,253,580,218 instructions # 1.98 insn per cycle + 0.580508158 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.324637e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.489814e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489814e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.338032e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.481810e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481810e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.609151 sec +TOTAL : 0.561846 sec INFO: No Floating Point Exceptions have been reported - 1,759,711,411 cycles # 2.834 GHz - 3,345,109,138 instructions # 1.90 insn per cycle - 0.621588850 seconds time elapsed + 1,602,124,528 cycles # 2.826 GHz + 3,209,983,521 instructions # 2.00 insn per cycle + 0.567621873 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.155541e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.119089e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.119089e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.198566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.176156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.176156e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.645680 sec +TOTAL : 0.590094 sec INFO: No Floating Point Exceptions have been reported - 1,520,276,942 cycles # 2.311 GHz - 2,512,095,426 instructions # 1.65 insn per cycle - 0.658351218 seconds time elapsed + 1,347,708,343 cycles # 2.265 GHz + 2,376,834,038 instructions # 1.76 insn per cycle + 0.595752442 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 635fef145f..856901d743 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-15_12:18:34 +DATE: 2024-09-18_13:34:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.163952e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.460728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.975996e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.212511e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510212e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023434e+08 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.521235 sec +TOTAL : 0.523683 sec INFO: No Floating Point Exceptions have been reported - 2,178,235,183 cycles # 2.877 GHz - 3,088,126,574 instructions # 1.42 insn per cycle - 0.814490194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,161,000,888 cycles # 2.849 GHz + 3,093,780,518 instructions # 1.43 insn per cycle + 0.816657446 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.905010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.017534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.017534e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.006386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027076e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027076e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.334358 sec +TOTAL : 1.275858 sec INFO: No Floating Point Exceptions have been reported - 3,930,631,045 cycles # 2.921 GHz - 9,778,615,750 instructions # 2.49 insn per cycle - 1.346795690 seconds time elapsed + 3,759,691,883 cycles # 2.936 GHz + 9,643,680,583 instructions # 2.57 insn per cycle + 1.281474685 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.520527e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978156e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978156e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.517196e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947819e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.848228 sec +TOTAL : 0.804148 sec INFO: No Floating Point Exceptions have been reported - 2,460,773,168 cycles # 2.862 GHz - 5,993,984,003 instructions # 2.44 insn per cycle - 0.860657174 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1368) (avx2: 0) (512y: 0) (512z: 0) + 2,322,905,849 cycles # 2.871 GHz + 5,850,527,655 instructions # 2.52 insn per cycle + 0.809789330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.233001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.325225e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.325225e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.254780e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333242e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.627322 sec +TOTAL : 0.577840 sec INFO: No Floating Point Exceptions have been reported - 1,810,072,132 cycles # 2.832 GHz - 3,352,499,816 instructions # 1.85 insn per cycle - 0.639697989 seconds time elapsed + 1,650,198,876 cycles # 2.831 GHz + 3,216,570,367 instructions # 1.95 insn per cycle + 0.583563842 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.331445e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.508166e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.508166e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.314025e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454653e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.454653e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.606762 sec +TOTAL : 0.565918 sec INFO: No Floating Point Exceptions have been reported - 1,747,202,335 cycles # 2.825 GHz - 3,316,993,487 instructions # 1.90 insn per cycle - 0.619170203 seconds time elapsed + 1,600,538,363 cycles # 2.803 GHz + 3,181,550,003 instructions # 1.99 insn per cycle + 0.571587963 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.144157e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.097547e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.097547e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.185908e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.142994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.142994e+06 ) sec^-1 MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.647875 sec +TOTAL : 0.592175 sec INFO: No Floating Point Exceptions have been reported - 1,527,422,709 cycles # 2.315 GHz - 2,496,191,682 instructions # 1.63 insn per cycle - 0.660479795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1054) + 1,356,716,498 cycles # 2.272 GHz + 2,361,264,569 instructions # 1.74 insn per cycle + 0.597815792 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1056) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956674392650 Relative difference = 2.2512972893324335e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 0f0996a4b7..99516e3f65 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:15:09 +DATE: 2024-09-18_13:30:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.705596e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.093007e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.806904e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.206537e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.286021e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.966943e+07 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.547454 sec +TOTAL : 0.539704 sec INFO: No Floating Point Exceptions have been reported - 2,244,643,125 cycles # 2.856 GHz - 3,194,753,552 instructions # 1.42 insn per cycle - 0.844381889 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,208,534,555 cycles # 2.845 GHz + 3,150,536,398 instructions # 1.43 insn per cycle + 0.835623159 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358666195562 Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.817695e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865026e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865026e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.822612e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.869779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869779e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.917395 sec +TOTAL : 5.861835 sec INFO: No Floating Point Exceptions have been reported - 17,420,747,172 cycles # 2.939 GHz - 46,039,408,535 instructions # 2.64 insn per cycle - 5.929443281 seconds time elapsed + 17,248,615,219 cycles # 2.940 GHz + 45,920,744,006 instructions # 2.66 insn per cycle + 5.867505238 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194407 Relative difference = 6.616637439061751e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.177458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.337417e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.337417e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.157644e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.314617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.314617e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.449474 sec +TOTAL : 3.430104 sec INFO: No Floating Point Exceptions have been reported - 10,167,811,545 cycles # 2.940 GHz - 27,922,488,818 instructions # 2.75 insn per cycle - 3.461267593 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2533) (avx2: 0) (512y: 0) (512z: 0) + 10,035,725,674 cycles # 2.922 GHz + 27,802,903,324 instructions # 2.77 insn per cycle + 3.435933108 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.001375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.394642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.394642e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.941289e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.318652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.318652e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.246102 sec +TOTAL : 2.231098 sec INFO: No Floating Point Exceptions have been reported - 6,225,214,133 cycles # 2.758 GHz - 12,703,481,596 instructions # 2.04 insn per cycle - 2.257992148 seconds time elapsed + 6,101,804,369 cycles # 2.729 GHz + 12,586,990,350 instructions # 2.06 insn per cycle + 2.237005738 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.452222e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.920256e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.920256e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.519324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.987161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.987161e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.070361 sec +TOTAL : 2.007245 sec INFO: No Floating Point Exceptions have been reported - 5,740,692,800 cycles # 2.758 GHz - 12,120,362,498 instructions # 2.11 insn per cycle - 2.082196362 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2363) (512y: 144) (512z: 0) + 5,563,695,868 cycles # 2.765 GHz + 12,000,166,171 instructions # 2.16 insn per cycle + 2.013040788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2365) (512y: 144) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.496236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.681187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.681187e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.502694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684435e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.148263 sec +TOTAL : 3.101349 sec INFO: No Floating Point Exceptions have been reported - 5,893,231,770 cycles # 1.865 GHz - 8,460,083,225 instructions # 1.44 insn per cycle - 3.160116132 seconds time elapsed + 5,749,698,258 cycles # 1.851 GHz + 8,343,640,860 instructions # 1.45 insn per cycle + 3.107135736 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index b863aa4b8d..1f4bfaf624 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:15:34 +DATE: 2024-09-18_13:31:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.254248e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.331434e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002046e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.340722e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.356922e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.992900e+07 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.532593 sec +TOTAL : 0.532286 sec INFO: No Floating Point Exceptions have been reported - 2,206,521,572 cycles # 2.874 GHz - 3,181,038,873 instructions # 1.44 insn per cycle - 0.824867346 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,205,060,845 cycles # 2.868 GHz + 3,167,717,935 instructions # 1.44 insn per cycle + 0.825884785 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358666195562 Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.847951e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.897550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.897550e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.873402e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.922894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922894e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.824198 sec +TOTAL : 5.705020 sec INFO: No Floating Point Exceptions have been reported - 17,074,611,956 cycles # 2.927 GHz - 45,037,522,622 instructions # 2.64 insn per cycle - 5.835488505 seconds time elapsed + 16,751,892,515 cycles # 2.934 GHz + 44,906,929,991 instructions # 2.68 insn per cycle + 5.710885629 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.339846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.517622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.517622e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.361567e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.536177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536177e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.286951 sec +TOTAL : 3.227440 sec INFO: No Floating Point Exceptions have been reported - 9,688,702,526 cycles # 2.938 GHz - 26,805,473,197 instructions # 2.77 insn per cycle - 3.298888236 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) + 9,512,762,540 cycles # 2.943 GHz + 26,678,539,109 instructions # 2.80 insn per cycle + 3.233163450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.556976e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.882070e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.882070e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.604596e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.927835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.927835e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.448521 sec +TOTAL : 2.385363 sec INFO: No Floating Point Exceptions have been reported - 6,771,268,311 cycles # 2.753 GHz - 14,227,806,494 instructions # 2.10 insn per cycle - 2.460277833 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2704) (512y: 0) (512z: 0) + 6,599,025,301 cycles # 2.760 GHz + 14,108,971,201 instructions # 2.14 insn per cycle + 2.391489598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2705) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.776530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.130549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.130549e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.791684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.138771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.138771e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.341767 sec +TOTAL : 2.294784 sec INFO: No Floating Point Exceptions have been reported - 6,488,711,878 cycles # 2.758 GHz - 13,822,301,429 instructions # 2.13 insn per cycle - 2.353629315 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2355) (512y: 297) (512z: 0) + 6,350,789,081 cycles # 2.762 GHz + 13,712,967,214 instructions # 2.16 insn per cycle + 2.300513281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 298) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.365014e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.535721e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.535721e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.371675e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.540530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540530e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.263382 sec +TOTAL : 3.217988 sec INFO: No Floating Point Exceptions have been reported - 6,085,804,948 cycles # 1.859 GHz - 10,219,161,569 instructions # 1.68 insn per cycle - 3.275179492 seconds time elapsed + 5,939,821,646 cycles # 1.843 GHz + 10,101,817,070 instructions # 1.70 insn per cycle + 3.223668588 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index cf83c07d47..1a672b74ce 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:16:00 +DATE: 2024-09-18_13:31:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.223710e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.732879e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.860967e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.264093e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.766977e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882650e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.489964 sec +TOTAL : 0.489615 sec INFO: No Floating Point Exceptions have been reported - 2,041,886,975 cycles # 2.853 GHz - 2,932,689,889 instructions # 1.44 insn per cycle - 0.773926194 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,060,695,462 cycles # 2.874 GHz + 2,961,708,283 instructions # 1.44 insn per cycle + 0.774445109 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015841e+00 Avg ME (F77/GPU) = 2.0158787037944421 Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.929290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.984521e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.984521e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.937524e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.992418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992418e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.538462 sec +TOTAL : 5.498876 sec INFO: No Floating Point Exceptions have been reported - 16,282,391,613 cycles # 2.936 GHz - 45,369,954,990 instructions # 2.79 insn per cycle - 5.546087919 seconds time elapsed + 16,211,815,789 cycles # 2.946 GHz + 45,319,917,505 instructions # 2.80 insn per cycle + 5.504546294 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158491701586172 Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.517678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.857555e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.857555e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.533229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.869354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.869354e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.425578 sec +TOTAL : 2.401545 sec INFO: No Floating Point Exceptions have been reported - 7,146,342,805 cycles # 2.938 GHz - 17,820,817,556 instructions # 2.49 insn per cycle - 2.433499088 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2: 0) (512y: 0) (512z: 0) + 7,056,760,375 cycles # 2.932 GHz + 17,791,878,594 instructions # 2.52 insn per cycle + 2.407391534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158486895961687 Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.300139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.447005e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.447005e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.087610e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.152694e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.152694e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.366410 sec +TOTAL : 1.383748 sec INFO: No Floating Point Exceptions have been reported - 3,812,530,133 cycles # 2.776 GHz - 8,314,531,864 instructions # 2.18 insn per cycle - 1.374237525 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3369) (512y: 0) (512z: 0) + 3,839,977,803 cycles # 2.765 GHz + 8,262,037,377 instructions # 2.15 insn per cycle + 1.389311013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3371) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.645853e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.897799e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.897799e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.847495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.011837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.011837e+06 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.316285 sec +TOTAL : 1.271772 sec INFO: No Floating Point Exceptions have been reported - 3,675,425,998 cycles # 2.778 GHz - 7,974,219,247 instructions # 2.17 insn per cycle - 1.323972787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3213) (512y: 20) (512z: 0) + 3,548,498,858 cycles # 2.779 GHz + 7,914,474,526 instructions # 2.23 insn per cycle + 1.277559305 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3214) (512y: 20) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.513578e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.178216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.178216e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.536546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.195032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.195032e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.714470 sec +TOTAL : 1.690671 sec INFO: No Floating Point Exceptions have been reported - 3,315,579,741 cycles # 1.925 GHz - 6,150,343,295 instructions # 1.85 insn per cycle - 1.722857238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2256) (512y: 24) (512z: 2156) + 3,256,995,213 cycles # 1.921 GHz + 6,100,882,884 instructions # 1.87 insn per cycle + 1.696260075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2258) (512y: 22) (512z: 2156) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015848e+00 Avg ME (F77/C++) = 2.0158476348733529 Relative difference = 1.8112806478434436e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 60f2dad34a..d3b2f0408f 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:16:21 +DATE: 2024-09-18_13:31:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.969923e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.737348e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.863422e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.208288e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783701e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898530e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.488943 sec +TOTAL : 0.487345 sec INFO: No Floating Point Exceptions have been reported - 2,058,928,554 cycles # 2.875 GHz - 2,909,617,560 instructions # 1.41 insn per cycle - 0.773317754 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,044,938,895 cycles # 2.858 GHz + 2,894,501,323 instructions # 1.42 insn per cycle + 0.773252899 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015841e+00 Avg ME (F77/GPU) = 2.0158787037944421 Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.962212e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019252e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019252e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.963294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019360e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.446348 sec +TOTAL : 5.426510 sec INFO: No Floating Point Exceptions have been reported - 16,014,544,982 cycles # 2.937 GHz - 44,474,347,041 instructions # 2.78 insn per cycle - 5.454124254 seconds time elapsed + 15,955,926,327 cycles # 2.938 GHz + 44,427,771,107 instructions # 2.78 insn per cycle + 5.431874949 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158491701586172 Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.286669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.759924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.759924e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.335493e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.807156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.807156e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.086643 sec +TOTAL : 2.051564 sec INFO: No Floating Point Exceptions have been reported - 6,135,728,749 cycles # 2.931 GHz - 17,120,648,230 instructions # 2.79 insn per cycle - 2.094524948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) + 6,058,187,563 cycles # 2.946 GHz + 17,074,725,200 instructions # 2.82 insn per cycle + 2.057140058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 Avg ME (F77/C++) = 2.0158486895961687 Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.052770e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.643067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.643067e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.066914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.644109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.644109e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.835441 sec +TOTAL : 1.813330 sec INFO: No Floating Point Exceptions have been reported - 5,101,873,696 cycles # 2.769 GHz - 10,273,156,361 instructions # 2.01 insn per cycle - 1.843297684 seconds time elapsed + 5,026,891,048 cycles # 2.765 GHz + 10,223,175,449 instructions # 2.03 insn per cycle + 1.818918027 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.133897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.741009e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.741009e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.155601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.742490e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.742490e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.812216 sec +TOTAL : 1.789620 sec INFO: No Floating Point Exceptions have been reported - 5,041,846,676 cycles # 2.771 GHz - 10,042,915,318 instructions # 1.99 insn per cycle - 1.820042823 seconds time elapsed + 4,970,225,584 cycles # 2.770 GHz + 9,994,978,881 instructions # 2.01 insn per cycle + 1.795236203 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015847e+00 Avg ME (F77/C++) = 2.0158474864438176 Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.642094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.969408e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.969408e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.666448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.992729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.992729e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 2.363033 sec +TOTAL : 2.333237 sec INFO: No Floating Point Exceptions have been reported - 4,430,997,247 cycles # 1.870 GHz - 8,493,309,798 instructions # 1.92 insn per cycle - 2.370917653 seconds time elapsed + 4,367,486,322 cycles # 1.868 GHz + 8,444,271,998 instructions # 1.93 insn per cycle + 2.338821094 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015848e+00 Avg ME (F77/C++) = 2.0158476348733529 Relative difference = 1.8112806478434436e-07 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 62fab95ac2..c1f4bb8132 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:16:43 +DATE: 2024-09-18_13:32:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.370448e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.371457e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004604e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.373966e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.408476e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005223e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.536026 sec +TOTAL : 0.533114 sec INFO: No Floating Point Exceptions have been reported - 2,214,409,122 cycles # 2.867 GHz - 3,163,292,335 instructions # 1.43 insn per cycle - 0.830149622 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 + 2,212,396,057 cycles # 2.876 GHz + 3,189,695,931 instructions # 1.44 insn per cycle + 0.826417249 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.787519e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.833765e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.833765e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.812942e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.859362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.859362e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 6.014473 sec +TOTAL : 5.891473 sec INFO: No Floating Point Exceptions have been reported - 17,675,190,561 cycles # 2.934 GHz - 46,198,484,525 instructions # 2.61 insn per cycle - 6.025789457 seconds time elapsed + 17,373,992,128 cycles # 2.947 GHz + 46,072,043,013 instructions # 2.65 insn per cycle + 5.897196721 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.209372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.373008e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.373008e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.226094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.386425e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386425e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.414727 sec +TOTAL : 3.358561 sec INFO: No Floating Point Exceptions have been reported - 10,062,586,014 cycles # 2.937 GHz - 27,715,049,037 instructions # 2.75 insn per cycle - 3.427097999 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) + 9,911,091,884 cycles # 2.947 GHz + 27,587,758,232 instructions # 2.78 insn per cycle + 3.364358964 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.030622e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.429639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.429639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.044961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.439076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.439076e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.231035 sec +TOTAL : 2.186755 sec INFO: No Floating Point Exceptions have been reported - 6,157,448,048 cycles # 2.747 GHz - 12,606,647,104 instructions # 2.05 insn per cycle - 2.242669652 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2777) (512y: 0) (512z: 0) + 6,022,763,481 cycles # 2.748 GHz + 12,488,130,017 instructions # 2.07 insn per cycle + 2.192467039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.510413e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.987462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.987462e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.596506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.079685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.079685e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.049793 sec +TOTAL : 1.980331 sec INFO: No Floating Point Exceptions have been reported - 5,651,790,254 cycles # 2.742 GHz - 12,043,922,780 instructions # 2.13 insn per cycle - 2.061986198 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2522) (512y: 146) (512z: 0) + 5,504,974,873 cycles # 2.773 GHz + 11,923,154,801 instructions # 2.17 insn per cycle + 1.986372291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 146) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.559881e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.752268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.752268e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.610025e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.802161e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.093339 sec +TOTAL : 3.013333 sec INFO: No Floating Point Exceptions have been reported - 5,777,925,002 cycles # 1.861 GHz - 8,230,989,757 instructions # 1.42 insn per cycle - 3.105063126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1866) + 5,617,715,088 cycles # 1.861 GHz + 8,110,898,143 instructions # 1.44 insn per cycle + 3.019371634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1865) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index bde416a886..744bfec9d4 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,5 +1,5 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' @@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h make: Nothing to be done for 'gtestlibs'. make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-15_12:17:08 +DATE: 2024-09-18_13:32:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.300920e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.324653e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.961570e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.356227e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.388949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002637e+08 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.533607 sec +TOTAL : 0.530171 sec INFO: No Floating Point Exceptions have been reported - 2,198,649,887 cycles # 2.849 GHz - 3,081,934,025 instructions # 1.40 insn per cycle - 0.827713906 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 + 2,205,062,942 cycles # 2.875 GHz + 3,154,626,469 instructions # 1.43 insn per cycle + 0.823696592 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.850472e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.899391e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.899391e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.861428e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.909561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909561e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.814253 sec +TOTAL : 5.739728 sec INFO: No Floating Point Exceptions have been reported - 17,118,477,102 cycles # 2.939 GHz - 45,207,445,046 instructions # 2.64 insn per cycle - 5.826249043 seconds time elapsed + 16,938,834,117 cycles # 2.949 GHz + 45,091,140,717 instructions # 2.66 insn per cycle + 5.745446347 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.320488e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495447e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.495447e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.325491e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.496074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496074e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.305181 sec +TOTAL : 3.260792 sec INFO: No Floating Point Exceptions have been reported - 9,752,287,704 cycles # 2.941 GHz - 26,369,462,343 instructions # 2.70 insn per cycle - 3.316567159 seconds time elapsed + 9,505,160,256 cycles # 2.910 GHz + 26,249,919,899 instructions # 2.76 insn per cycle + 3.266614954 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.466984e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.783441e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.783441e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.459875e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.763541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.763541e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.496548 sec +TOTAL : 2.458814 sec INFO: No Floating Point Exceptions have been reported - 6,902,736,140 cycles # 2.753 GHz - 14,146,955,352 instructions # 2.05 insn per cycle - 2.508688639 seconds time elapsed + 6,750,977,111 cycles # 2.740 GHz + 14,029,286,718 instructions # 2.08 insn per cycle + 2.464538527 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.747061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.095585e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.095585e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.781257e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129375e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.355880 sec +TOTAL : 2.300139 sec INFO: No Floating Point Exceptions have been reported - 6,536,932,805 cycles # 2.762 GHz - 13,633,905,312 instructions # 2.09 insn per cycle - 2.367915662 seconds time elapsed + 6,382,631,497 cycles # 2.769 GHz + 13,515,067,929 instructions # 2.12 insn per cycle + 2.305941749 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.589352e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.785622e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.785622e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.602901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.797238e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797238e+05 ) sec^-1 MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.069165 sec +TOTAL : 3.017121 sec INFO: No Floating Point Exceptions have been reported - 5,741,871,289 cycles # 1.864 GHz - 9,325,593,834 instructions # 1.62 insn per cycle - 3.081760977 seconds time elapsed + 5,589,518,345 cycles # 1.850 GHz + 9,206,594,679 instructions # 1.65 insn per cycle + 3.022936699 seconds time elapsed =Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359178371690 Relative difference = 4.0758688308634e-08 From 5439b7d450162d78fef37f4c5cd9a7fd3b6379de Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 17:51:26 +0200 Subject: [PATCH 58/76] [clang] ** COMPLETE CLANG ** rerun 30 tmad tests on itscrd90 - all as expected STARTED AT Wed Sep 18 01:38:02 PM CEST 2024 (SM tests) ENDED(1) AT Wed Sep 18 05:31:59 PM CEST 2024 [Status=0] (BSM tests) ENDED(1) AT Wed Sep 18 05:42:22 PM CEST 2024 [Status=0] 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 158 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 160 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 164 +++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 168 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 166 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 180 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 178 ++++++++--------- .../log_ggttg_mad_f_inl0_hrd0.txt | 174 ++++++++-------- .../log_ggttg_mad_m_inl0_hrd0.txt | 176 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 174 ++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 176 ++++++++-------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 178 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 184 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 182 ++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 188 +++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 164 +++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 172 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 168 ++++++++-------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 160 +++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 58 +++--- .../log_heftggbb_mad_m_inl0_hrd0.txt | 164 +++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 170 ++++++++-------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 172 ++++++++-------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 172 ++++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 168 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 160 +++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 174 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 164 +++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 162 +++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 172 ++++++++-------- 30 files changed, 2503 insertions(+), 2503 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 0c0dfe85e2..a32be077f9 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_12:24:25 +DATE: 2024-09-18_13:40:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,8 +58,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7657s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7582s + [COUNTERS] PROGRAM TOTAL : 0.7474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7399s [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2125s - [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2197s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2120s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6595s - [COUNTERS] Fortran MEs ( 1 ) : 0.0749s for 81920 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7224s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s + [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 81920 events => throughput is 1.10E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2319s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2242s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6509s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0723s for 81920 events => throughput is 1.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0720s for 81920 events => throughput is 1.14E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.158974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150298e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.174895e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170213e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2302s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.81E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 81920 events => throughput is 1.88E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 81920 events => throughput is 1.86E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953684e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.910014e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.997304e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.998657e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2239s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.40E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6539s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 81920 events => throughput is 2.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 81920 events => throughput is 2.45E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566849e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.599120e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686525e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.638604e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 81920 events => throughput is 2.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 81920 events => throughput is 2.52E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.680421e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.664038e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.822232e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739981e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2235s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2189s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.02E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6871s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 81920 events => throughput is 2.00E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.6880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0401s for 81920 events => throughput is 2.05E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.142328e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061679e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.216839e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.165474e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6609s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.1813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1726s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 81920 events => throughput is 1.01E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.0937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.06E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.861787e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.180467e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.444487e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.226375e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131686e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.783312e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.605423e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.210512e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.162553e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.795717e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.757987e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.200044e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.185886e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159916e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069069e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index a35b30e9f9..d760c23b34 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_12:24:44 +DATE: 2024-09-18_13:40:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7477s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7400s - [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7356s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2210s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7367s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6615s - [COUNTERS] Fortran MEs ( 1 ) : 0.0752s for 81920 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7385s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6605s + [COUNTERS] Fortran MEs ( 1 ) : 0.0780s for 81920 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2281s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.21E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6521s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0681s for 81920 events => throughput is 1.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0680s for 81920 events => throughput is 1.20E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.224241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.221041e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240015e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.228624e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2147s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6836s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6563s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 81920 events => throughput is 2.98E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177757e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.101743e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.232799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.221746e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2259s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2232s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6769s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6517s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 81920 events => throughput is 3.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 81920 events => throughput is 3.28E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494973e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.474277e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.587728e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.584047e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2174s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2146s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6545s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 81920 events => throughput is 3.37E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6505s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 81920 events => throughput is 3.40E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.557503e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.456987e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.521678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.708350e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,8 +454,8 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2290s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2260s + [COUNTERS] PROGRAM TOTAL : 0.2238s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6543s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 81920 events => throughput is 3.19E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.370852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378249e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.638790e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.571882e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6536s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.56E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6559s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6524s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.1007s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.07E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.0956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 81920 events => throughput is 1.07E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.990359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.223914e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489581e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.169145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.046619e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.016803e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.917172e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.172418e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.064257e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.059728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.895930e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.911760e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.656385e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.715158e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636564e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 078c01c46b..3678e8e364 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum make USEBUILDDIR=1 BACKEND=cuda + + + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-15_12:25:03 +DATE: 2024-09-18_13:41:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7443s - [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7466s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7391s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2218s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2141s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7304s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6547s - [COUNTERS] Fortran MEs ( 1 ) : 0.0757s for 81920 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7260s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6506s + [COUNTERS] Fortran MEs ( 1 ) : 0.0754s for 81920 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.10E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2127s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7244s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6512s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0730s for 81920 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 81920 events => throughput is 1.10E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.133764e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.149889e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.147681e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2124s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.91E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7028s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 81920 events => throughput is 1.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 81920 events => throughput is 1.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996644e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.042848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.048925e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2175s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 81920 events => throughput is 2.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 81920 events => throughput is 2.49E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.582216e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.590922e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.715007e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.665063e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2166s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 81920 events => throughput is 2.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 81920 events => throughput is 2.45E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.595546e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.604540e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.748668e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.589532e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2172s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2131s + [COUNTERS] PROGRAM TOTAL : 0.2176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2134s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0382s for 81920 events => throughput is 2.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6977s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0378s for 81920 events => throughput is 2.17E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.209077e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201898e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.339473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.266955e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,8 +534,8 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6512s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6475s + [COUNTERS] PROGRAM TOTAL : 0.6541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0962s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0880s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.06E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.0910s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 81920 events => throughput is 9.79E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.884543e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081337e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.177546e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286137e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.251289e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.854991e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.774363e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280291e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.864709e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.840047e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.202609e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.258897e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.123678e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124478e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 96c9821e5a..21d2f45edf 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_12:25:22 +DATE: 2024-09-18_13:41:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8316s - [COUNTERS] Fortran MEs ( 1 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8485s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4254s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4077s + [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6157s - [COUNTERS] Fortran MEs ( 1 ) : 0.4316s for 81920 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5349s + [COUNTERS] Fortran MEs ( 1 ) : 0.4212s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0844s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4617s for 81920 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4468s for 81920 events => throughput is 1.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.819761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.851914e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.831048e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.872591e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4726s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0258s for 8192 events => throughput is 3.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4102s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.04E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.8861s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6258s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2599s for 81920 events => throughput is 3.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5386s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2515s for 81920 events => throughput is 3.26E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.274415e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.305635e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.309936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.371218e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7336s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1571s for 81920 events => throughput is 5.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.7022s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1579s for 81920 events => throughput is 5.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.284458e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.172913e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.379827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.264385e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7254s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1439s for 81920 events => throughput is 5.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5367s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1435s for 81920 events => throughput is 5.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.785167e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.786323e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.877514e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.850142e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4571s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4340s + [COUNTERS] PROGRAM TOTAL : 0.4275s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.8048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2261s for 81920 events => throughput is 3.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7739s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2259s for 81920 events => throughput is 3.63E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.669033e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.522776e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.709076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.620788e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8678s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8638s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0186s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0088s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 81920 events => throughput is 8.83E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.691011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.921444e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.112364e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.230318e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.690097e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.714613e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.580203e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.316499e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.646974e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.728492e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.911031e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.598150e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.660586e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745533e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.671503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694862e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 332313b063..0850891597 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,4 +1,4 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cuda @@ -7,35 +7,35 @@ make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_12:25:51 +DATE: 2024-09-18_13:41:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8148s - [COUNTERS] Fortran MEs ( 1 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8368s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7947s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4279s - [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4076s + [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9993s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5780s - [COUNTERS] Fortran MEs ( 1 ) : 0.4213s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5472s + [COUNTERS] Fortran MEs ( 1 ) : 0.4205s for 81920 events => throughput is 1.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4661s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4454s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0011s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5800s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4208s for 81920 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9812s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.965272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.959611e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.960936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.959548e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7527s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1729s for 81920 events => throughput is 4.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7203s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1724s for 81920 events => throughput is 4.75E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.736938e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.738872e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.756905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.748017e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4356s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6648s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5733s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0913s for 81920 events => throughput is 8.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6374s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0918s for 81920 events => throughput is 8.93E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.164017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.120680e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.232442e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.193326e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4302s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4213s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.23E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6635s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0868s for 81920 events => throughput is 9.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 81920 events => throughput is 9.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.838171e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.788116e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.798371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.789950e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4367s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4239s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.95E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7012s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1202s for 81920 events => throughput is 6.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6732s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1222s for 81920 events => throughput is 6.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.784830e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.765000e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.151143e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.898629e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8626s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8533s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0261s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 81920 events => throughput is 9.98E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9917s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 81920 events => throughput is 9.59E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.885710e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.139565e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.367920e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.535095e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.566831e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.504949e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323685e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.302031e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.551074e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.479596e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.318066e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.319419e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.342254e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.230042e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899112e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.720607e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 690f140a41..1cd7f5e3d4 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_12:26:19 +DATE: 2024-09-18_13:42:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8180s - [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8020s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4333s - [COUNTERS] Fortran MEs ( 1 ) : 0.0440s for 8192 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9957s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5742s - [COUNTERS] Fortran MEs ( 1 ) : 0.4215s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9802s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5584s + [COUNTERS] Fortran MEs ( 1 ) : 0.4219s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4242s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4569s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 8192 events => throughput is 1.79E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0415s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5863s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4548s for 81920 events => throughput is 1.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0058s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4529s for 81920 events => throughput is 1.81E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.842164e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.833075e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.851851e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.846422e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4503s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4251s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4330s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.8185s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2483s for 81920 events => throughput is 3.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.8037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2487s for 81920 events => throughput is 3.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.330961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318121e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.355752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.355189e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4396s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4241s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0159s for 8192 events => throughput is 5.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7262s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5697s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1562s for 81920 events => throughput is 5.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.7037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1532s for 81920 events => throughput is 5.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.311307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.280868e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.353756e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.319511e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4240s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7199s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1415s for 81920 events => throughput is 5.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6742s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1426s for 81920 events => throughput is 5.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.855269e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.827488e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.000340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.962674e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4473s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 8192 events => throughput is 3.72E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.8042s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2217s for 81920 events => throughput is 3.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.7709s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2228s for 81920 events => throughput is 3.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.705483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662408e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.752040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.633047e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8650s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.8496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0284s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.96E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.9898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.739810e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.961867e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.024140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.402195e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.672565e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.751023e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.446976e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.487612e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.658819e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.767038e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.897824e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.725223e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.677542e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.748403e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.722978e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694986e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index b1e24854d0..652edcf84f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:26:48 +DATE: 2024-09-18_13:42:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7657s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4327s - [COUNTERS] Fortran MEs ( 1 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7493s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4200s + [COUNTERS] Fortran MEs ( 1 ) : 0.3293s for 8192 events => throughput is 2.49E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4051s - [COUNTERS] Fortran MEs ( 1 ) : 0.3320s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3834s + [COUNTERS] Fortran MEs ( 1 ) : 0.3316s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9222s - [COUNTERS] Fortran MEs ( 1 ) : 3.3207s for 81920 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1953s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8899s + [COUNTERS] Fortran MEs ( 1 ) : 3.3054s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7536s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4055s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3470s for 8192 events => throughput is 2.36E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.7369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3477s for 8192 events => throughput is 2.36E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.3883s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9232s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4640s for 81920 events => throughput is 2.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8952s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4633s for 81920 events => throughput is 2.37E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.429969e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.471888e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.453551e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.456119e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1821s for 8192 events => throughput is 4.50E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.5680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3869s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 8192 events => throughput is 4.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.7388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9277s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8104s for 81920 events => throughput is 4.52E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 3.6931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8866s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8056s for 81920 events => throughput is 4.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.641238e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.653702e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.663272e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.694721e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4942s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 8192 events => throughput is 9.05E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0905s for 8192 events => throughput is 9.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8146s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9105s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9035s for 81920 events => throughput is 9.07E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.8119s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9041s for 81920 events => throughput is 9.06E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.167207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.368531e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.243572e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.254727e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4829s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 8192 events => throughput is 1.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4668s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0809s for 8192 events => throughput is 1.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.7304s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9177s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8121s for 81920 events => throughput is 1.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.6912s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8083s for 81920 events => throughput is 1.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.039882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.047750e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.052704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045236e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4042s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1150s for 8192 events => throughput is 7.12E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.5013s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1140s for 8192 events => throughput is 7.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.0660s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9169s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1486s for 81920 events => throughput is 7.13E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 3.0360s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1383s for 81920 events => throughput is 7.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.941251e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.160157e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.958022e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.320569e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 8192 events => throughput is 8.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8227s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3784s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0258s for 81920 events => throughput is 3.18E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 2.3543s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3252s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.100979e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134986e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.403743e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.475726e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.183615e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.339604e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.166273e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161734e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.191972e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.354476e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170848e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170951e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.176434e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.318892e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.659019e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.662470e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 985d934955..3362abfbc9 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:27:32 +DATE: 2024-09-18_13:43:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4325s - [COUNTERS] Fortran MEs ( 1 ) : 0.3323s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] Fortran MEs ( 1 ) : 0.3301s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7338s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4033s - [COUNTERS] Fortran MEs ( 1 ) : 0.3306s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3839s + [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2246s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9116s - [COUNTERS] Fortran MEs ( 1 ) : 3.3130s for 81920 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8749s + [COUNTERS] Fortran MEs ( 1 ) : 3.3030s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7384s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4045s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3330s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3896s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3317s for 8192 events => throughput is 2.47E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9235s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3360s for 81920 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8886s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3267s for 81920 events => throughput is 2.46E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530952e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.539712e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541703e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530008e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1020s for 8192 events => throughput is 8.03E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1015s for 8192 events => throughput is 8.07E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.9524s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9325s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0195s for 81920 events => throughput is 8.04E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.9019s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8838s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0176s for 81920 events => throughput is 8.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.224767e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.124328e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.232784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.199350e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4560s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0472s for 8192 events => throughput is 1.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4313s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0467s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.4007s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4660s for 81920 events => throughput is 1.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8840s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.788351e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.796536e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.804243e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.786454e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4502s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0424s for 8192 events => throughput is 1.93E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4292s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3548s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9286s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4259s for 81920 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3290s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4263s for 81920 events => throughput is 1.92E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.981352e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.958777e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.997761e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0553s for 8192 events => throughput is 1.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0556s for 8192 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.4956s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9320s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5632s for 81920 events => throughput is 1.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.4713s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9066s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5643s for 81920 events => throughput is 1.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.487964e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.458815e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.471980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.487461e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8512s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8337s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3665s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3527s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 81920 events => throughput is 6.42E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.3421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 81920 events => throughput is 6.21E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.643032e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.709678e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.828890e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.936833e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.215885e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247414e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.209299e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199841e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.230792e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.195768e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.279914e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.278448e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.122919e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.108387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244670e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.230857e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index f09398c494..4de53c2d38 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:28:12 +DATE: 2024-09-18_13:44:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s - [COUNTERS] Fortran MEs ( 1 ) : 0.3316s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7391s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.3310s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7376s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s - [COUNTERS] Fortran MEs ( 1 ) : 0.3336s for 8192 events => throughput is 2.46E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3840s + [COUNTERS] Fortran MEs ( 1 ) : 0.3281s for 8192 events => throughput is 2.50E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2380s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9161s - [COUNTERS] Fortran MEs ( 1 ) : 3.3219s for 81920 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1945s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8920s + [COUNTERS] Fortran MEs ( 1 ) : 3.3025s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4046s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3525s for 8192 events => throughput is 2.32E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 8192 events => throughput is 2.34E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.4849s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9415s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5423s for 81920 events => throughput is 2.31E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + [COUNTERS] PROGRAM TOTAL : 5.4502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8950s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5541s for 81920 events => throughput is 2.30E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.417569e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.405336e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.399198e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.411690e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 8192 events => throughput is 4.58E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.7197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9173s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8018s for 81920 events => throughput is 4.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.6943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8846s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8089s for 81920 events => throughput is 4.53E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.658424e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.686401e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.679651e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.704142e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4035s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0912s for 8192 events => throughput is 8.98E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8060s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8973s for 81920 events => throughput is 9.13E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8954s for 81920 events => throughput is 9.15E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.369001e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.255440e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.292033e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.362786e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0792s for 8192 events => throughput is 1.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0782s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.7172s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9239s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7928s for 81920 events => throughput is 1.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.6895s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7956s for 81920 events => throughput is 1.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064357e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053887e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.063711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.065947e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4057s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1167s for 8192 events => throughput is 7.02E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.5056s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 8192 events => throughput is 6.95E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.1033s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9248s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1778s for 81920 events => throughput is 6.96E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 3.0551s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8834s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1711s for 81920 events => throughput is 7.00E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.025965e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.918098e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.997215e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.013587e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8520s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 8192 events => throughput is 8.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 0.8394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8269s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3869s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 2.3560s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0262s for 81920 events => throughput is 3.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.075089e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.114701e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.284373e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.463889e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.113955e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.291446e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.151597e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155947e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.103058e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.267147e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.161281e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165743e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.097615e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.239896e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.632131e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.648131e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index aa79e74132..da4192a0d3 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:28:56 +DATE: 2024-09-18_13:44:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.6228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3217s - [COUNTERS] Fortran MEs ( 1 ) : 4.3011s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s + [COUNTERS] Fortran MEs ( 1 ) : 4.3146s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6030s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3119s - [COUNTERS] Fortran MEs ( 1 ) : 4.2911s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s + [COUNTERS] Fortran MEs ( 1 ) : 4.2843s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.1984s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1366s - [COUNTERS] Fortran MEs ( 1 ) : 43.0618s for 81920 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.0141s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s + [COUNTERS] Fortran MEs ( 1 ) : 42.9445s for 81920 events => throughput is 1.91E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.7927s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4709s for 8192 events => throughput is 1.83E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s + [COUNTERS] PROGRAM TOTAL : 4.7546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4446s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 46.9448s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1256s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.8103s for 81920 events => throughput is 1.83E+03 events/s + [COUNTERS] PROGRAM TOTAL : 46.6278s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0620s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.5568s for 81920 events => throughput is 1.84E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.891751e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897024e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.888267e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.894466e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.6844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3118s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3678s for 8192 events => throughput is 3.46E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 2.6753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3715s for 8192 events => throughput is 3.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 25.9231s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1299s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.7883s for 81920 events => throughput is 3.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.8378s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0749s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.7582s for 81920 events => throughput is 3.45E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.557169e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.534769e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.534554e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.572800e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.3455s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3111s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0319s for 8192 events => throughput is 7.94E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 1.3401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0420s for 8192 events => throughput is 7.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.5238s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1221s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3993s for 81920 events => throughput is 7.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 12.4720s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0723s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3972s for 81920 events => throughput is 7.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.957412e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.115646e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.987255e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.135568e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.2470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3177s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9271s for 8192 events => throughput is 8.84E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.2200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9191s for 8192 events => throughput is 8.91E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 11.3142s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1103s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.2017s for 81920 events => throughput is 8.90E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 11.2276s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0702s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.1552s for 81920 events => throughput is 8.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.281561e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.261139e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.268453e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.284539e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1778s for 8192 events => throughput is 6.96E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [COUNTERS] PROGRAM TOTAL : 1.4788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1801s for 8192 events => throughput is 6.94E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.9855s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1285s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8543s for 81920 events => throughput is 6.91E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 13.8631s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0851s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7752s for 81920 events => throughput is 6.96E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.050033e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.044735e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.018224e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.087685e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8261s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7522s + [COUNTERS] PROGRAM TOTAL : 0.8126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7388s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 8192 events => throughput is 2.10E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.9166s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3320s for 81920 events => throughput is 2.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.9095s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3260s for 81920 events => throughput is 2.51E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.134189e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.150288e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340464e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.124137e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120076e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169270e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.124208e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.173416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.169177e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120876e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.423891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.432039e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 4deca2cf02..d51442efc8 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:32:48 +DATE: 2024-09-18_13:48:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.6182s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3167s - [COUNTERS] Fortran MEs ( 1 ) : 4.3014s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5851s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2964s + [COUNTERS] Fortran MEs ( 1 ) : 4.2887s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3142s - [COUNTERS] Fortran MEs ( 1 ) : 4.2980s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] Fortran MEs ( 1 ) : 4.2983s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.2847s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1346s - [COUNTERS] Fortran MEs ( 1 ) : 43.1501s for 81920 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.0593s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0904s + [COUNTERS] Fortran MEs ( 1 ) : 42.9689s for 81920 events => throughput is 1.91E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6745s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3511s for 8192 events => throughput is 1.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s + [COUNTERS] PROGRAM TOTAL : 4.6331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3295s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -170,10 +170,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.6898s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1650s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.5163s for 81920 events => throughput is 1.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 45.4961s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0670s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.4203s for 81920 events => throughput is 1.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -186,12 +186,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.944338e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953905e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.941508e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.953638e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -215,10 +215,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3103s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1836s for 8192 events => throughput is 6.92E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 1.5948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2947s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2975s for 8192 events => throughput is 6.31E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -250,10 +250,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 14.1993s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1207s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.0762s for 81920 events => throughput is 6.78E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 14.0680s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0718s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9937s for 81920 events => throughput is 6.83E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -266,12 +266,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.996317e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.014424e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.839536e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.021993e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -295,10 +295,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3095s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5334s for 8192 events => throughput is 1.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [COUNTERS] PROGRAM TOTAL : 0.8304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5290s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -330,9 +330,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.3660s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1115s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.2532s for 81920 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.2285s for 81920 events => throughput is 1.57E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -346,12 +346,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.604925e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.599753e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.604030e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.600977e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -375,9 +375,9 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7886s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4739s for 8192 events => throughput is 1.73E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4666s for 8192 events => throughput is 1.76E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -410,9 +410,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 6.8107s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1081s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7015s for 81920 events => throughput is 1.74E+04 events/s + [COUNTERS] PROGRAM TOTAL : 6.8208s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0681s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7514s for 81920 events => throughput is 1.72E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -426,12 +426,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.805124e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.808420e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.796242e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.816362e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -455,10 +455,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8988s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3122s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5852s for 8192 events => throughput is 1.40E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.8859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 8192 events => throughput is 1.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -490,10 +490,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 8.0322s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1227s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.9080s for 81920 events => throughput is 1.39E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 7.9475s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0831s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8629s for 81920 events => throughput is 1.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -506,12 +506,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.400554e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420878e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.422518e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.411954e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -535,9 +535,9 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7991s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 8192 events => throughput is 3.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7891s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.03E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -570,10 +570,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.7864s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5289s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2329s for 81920 events => throughput is 3.52E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0245s + [COUNTERS] PROGRAM TOTAL : 2.8062s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2338s for 81920 events => throughput is 3.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -586,42 +586,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.087446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.088372e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.366930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376508e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.074769e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.114154e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.213607e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.259362e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.112109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.087087e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.224381e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.241028e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.063345e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.079549e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391392e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index abdc3e6985..4029a4bd08 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:35:52 +DATE: 2024-09-18_13:51:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.6491s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3163s - [COUNTERS] Fortran MEs ( 1 ) : 4.3328s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s + [COUNTERS] Fortran MEs ( 1 ) : 4.2815s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6308s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3143s - [COUNTERS] Fortran MEs ( 1 ) : 4.3165s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2967s + [COUNTERS] Fortran MEs ( 1 ) : 4.3148s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.2197s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1385s - [COUNTERS] Fortran MEs ( 1 ) : 43.0812s for 81920 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.1244s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0863s + [COUNTERS] Fortran MEs ( 1 ) : 43.0382s for 81920 events => throughput is 1.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.8483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5249s for 8192 events => throughput is 1.81E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0088s + [COUNTERS] PROGRAM TOTAL : 4.8194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2986s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5116s for 8192 events => throughput is 1.82E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0093s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 47.5178s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1281s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.3807s for 81920 events => throughput is 1.81E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s + [COUNTERS] PROGRAM TOTAL : 47.3150s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0784s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.2275s for 81920 events => throughput is 1.81E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.872546e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.873631e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.871624e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.874665e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.7061s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3125s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3887s for 8192 events => throughput is 3.43E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s + [COUNTERS] PROGRAM TOTAL : 2.6800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3756s for 8192 events => throughput is 3.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 26.0597s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1243s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.9304s for 81920 events => throughput is 3.42E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 25.8989s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0743s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8193s for 81920 events => throughput is 3.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0054s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.500917e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519279e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.521414e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530635e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.3556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3120s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0411s for 8192 events => throughput is 7.87E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 1.3351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2976s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0351s for 8192 events => throughput is 7.91E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.5367s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1235s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.4107s for 81920 events => throughput is 7.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.4644s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0722s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3898s for 81920 events => throughput is 7.88E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.110548e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.009062e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.032234e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.120880e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.2161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9010s for 8192 events => throughput is 9.09E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [COUNTERS] PROGRAM TOTAL : 1.1951s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2962s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8968s for 8192 events => throughput is 9.13E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 11.1682s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1205s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.0456s for 81920 events => throughput is 9.06E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [COUNTERS] PROGRAM TOTAL : 11.2498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0961s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.1516s for 81920 events => throughput is 8.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.335833e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.318473e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.254922e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.350512e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.5079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3120s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1930s for 8192 events => throughput is 6.87E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [COUNTERS] PROGRAM TOTAL : 1.4981s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3004s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1949s for 8192 events => throughput is 6.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 14.0414s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1292s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9095s for 81920 events => throughput is 6.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 13.9738s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0733s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8977s for 81920 events => throughput is 6.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.939493e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.010058e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.983445e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.974396e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8281s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7547s + [COUNTERS] PROGRAM TOTAL : 0.8114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7381s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } [XSECTION] ChannelId = 112 [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.9070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5436s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3289s for 81920 events => throughput is 2.49E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0345s + [COUNTERS] PROGRAM TOTAL : 2.9001s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3371s for 81920 events => throughput is 2.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.146362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156591e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.341729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143626e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.115354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.122372e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.161172e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154782e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164268e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.117598e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.426812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.420328e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 22c56c63cf..b5fe53dcd6 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg make USEBUILDDIR=1 BACKEND=cuda + + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_12:41:22 +DATE: 2024-09-18_13:57:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 102.2091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5630s - [COUNTERS] Fortran MEs ( 1 ) : 101.6461s for 8192 events => throughput is 8.06E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.9942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5376s + [COUNTERS] Fortran MEs ( 1 ) : 100.4566s for 8192 events => throughput is 8.15E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 102.3065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5557s - [COUNTERS] Fortran MEs ( 1 ) : 101.7508s for 8192 events => throughput is 8.05E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.8250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5356s + [COUNTERS] Fortran MEs ( 1 ) : 100.2893s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1015.5197s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6072s - [COUNTERS] Fortran MEs ( 1 ) : 1010.9125s for 81920 events => throughput is 8.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1009.3485s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5474s + [COUNTERS] Fortran MEs ( 1 ) : 1004.8011s for 81920 events => throughput is 8.15E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 121.0029s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5478s - [COUNTERS] CudaCpp MEs ( 2 ) : 120.2529s for 8192 events => throughput is 6.81E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2022s + [COUNTERS] PROGRAM TOTAL : 122.6272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5269s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.8976s for 8192 events => throughput is 6.72E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2027s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1237.7089s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5156s - [COUNTERS] CudaCpp MEs ( 2 ) : 1232.9854s for 81920 events => throughput is 6.64E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2079s + [COUNTERS] PROGRAM TOTAL : 1215.7257s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3426s + [COUNTERS] CudaCpp MEs ( 2 ) : 1211.1771s for 81920 events => throughput is 6.76E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2060s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.635196e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.947835e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.680081e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.953028e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.7563s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5430s - [COUNTERS] CudaCpp MEs ( 2 ) : 62.1127s for 8192 events => throughput is 1.32E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1006s + [COUNTERS] PROGRAM TOTAL : 65.0412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5159s + [COUNTERS] CudaCpp MEs ( 2 ) : 64.4246s for 8192 events => throughput is 1.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 620.1444s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5224s - [COUNTERS] CudaCpp MEs ( 2 ) : 615.5209s for 81920 events => throughput is 1.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1011s + [COUNTERS] PROGRAM TOTAL : 643.1942s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3363s + [COUNTERS] CudaCpp MEs ( 2 ) : 638.7557s for 81920 events => throughput is 1.28E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1021s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.588919e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582676e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.592321e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328359e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 29.4160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5466s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.8206s for 8192 events => throughput is 2.84E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0487s + [COUNTERS] PROGRAM TOTAL : 28.5049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5111s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9480s for 8192 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 297.3512s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5469s - [COUNTERS] CudaCpp MEs ( 2 ) : 292.7572s for 81920 events => throughput is 2.80E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s + [COUNTERS] PROGRAM TOTAL : 284.0441s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3313s + [COUNTERS] CudaCpp MEs ( 2 ) : 279.6676s for 81920 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.275338e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.544656e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.265860e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.429932e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 27.1464s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9932s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.1100s for 8192 events => throughput is 3.14E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0432s + [COUNTERS] PROGRAM TOTAL : 25.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5110s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8732s for 8192 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0401s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 264.6969s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5790s - [COUNTERS] CudaCpp MEs ( 2 ) : 260.0758s for 81920 events => throughput is 3.15E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0421s + [COUNTERS] PROGRAM TOTAL : 258.4274s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3572s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.0306s for 81920 events => throughput is 3.22E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0397s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.866714e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.924332e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.924843e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.943882e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 26.5115s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9718s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.4924s for 8192 events => throughput is 3.21E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0472s + [COUNTERS] PROGRAM TOTAL : 26.1506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5777s for 8192 events => throughput is 3.20E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 259.7252s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4963s - [COUNTERS] CudaCpp MEs ( 2 ) : 255.1820s for 81920 events => throughput is 3.21E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0469s + [COUNTERS] PROGRAM TOTAL : 258.8965s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4685s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.3821s for 81920 events => throughput is 3.22E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.450525e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.459682e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.456130e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.450518e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 3.2899s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0907s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1026s for 8192 events => throughput is 7.43E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0966s + [COUNTERS] PROGRAM TOTAL : 3.2103s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0334s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1011s for 8192 events => throughput is 7.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0758s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 16.9895s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0379s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8621s for 81920 events => throughput is 7.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0895s + [COUNTERS] PROGRAM TOTAL : 16.9165s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9531s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8819s for 81920 events => throughput is 7.53E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0816s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.484119e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.508582e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.258297e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.240924e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.228572e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.270483e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.575774e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.589702e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.255424e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.287293e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.453521e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.424280e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.234529e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.273685e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.253800e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.243085e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index ecfc5d7b2f..2a956cd657 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_13:59:30 +DATE: 2024-09-18_15:14:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.5343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5565s - [COUNTERS] Fortran MEs ( 1 ) : 100.9778s for 8192 events => throughput is 8.11E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.8383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5370s + [COUNTERS] Fortran MEs ( 1 ) : 100.3014s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.8885s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5530s - [COUNTERS] Fortran MEs ( 1 ) : 101.3354s for 8192 events => throughput is 8.08E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.6800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5304s + [COUNTERS] Fortran MEs ( 1 ) : 100.1496s for 8192 events => throughput is 8.18E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1019.5275s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6107s - [COUNTERS] Fortran MEs ( 1 ) : 1014.9167s for 81920 events => throughput is 8.07E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1006.5135s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5331s + [COUNTERS] Fortran MEs ( 1 ) : 1001.9804s for 81920 events => throughput is 8.18E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -135,10 +135,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 113.9851s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5716s - [COUNTERS] CudaCpp MEs ( 2 ) : 113.2257s for 8192 events => throughput is 7.24E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1878s + [COUNTERS] PROGRAM TOTAL : 113.7634s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5261s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.0501s for 8192 events => throughput is 7.25E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1871s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -171,10 +171,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1127.1766s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5299s - [COUNTERS] CudaCpp MEs ( 2 ) : 1122.4594s for 81920 events => throughput is 7.30E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1874s + [COUNTERS] PROGRAM TOTAL : 1135.0851s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4478s + [COUNTERS] CudaCpp MEs ( 2 ) : 1130.4514s for 81920 events => throughput is 7.25E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1858s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -187,12 +187,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.606058e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.611057e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.616772e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.618948e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -217,10 +217,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.4233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5609s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.8152s for 8192 events => throughput is 2.95E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s + [COUNTERS] PROGRAM TOTAL : 28.2816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5245s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7095s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0476s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -253,10 +253,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 283.7955s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5114s - [COUNTERS] CudaCpp MEs ( 2 ) : 279.2380s for 81920 events => throughput is 2.93E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s + [COUNTERS] PROGRAM TOTAL : 283.1165s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4533s + [COUNTERS] CudaCpp MEs ( 2 ) : 278.6156s for 81920 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -269,12 +269,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.393625e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409666e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.397500e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406819e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -299,10 +299,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 14.8014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5481s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2298s for 8192 events => throughput is 5.76E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0235s + [COUNTERS] PROGRAM TOTAL : 14.8123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5276s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.2607s for 8192 events => throughput is 5.74E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0240s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -335,10 +335,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 148.1207s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4972s - [COUNTERS] CudaCpp MEs ( 2 ) : 143.5998s for 81920 events => throughput is 5.70E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0237s + [COUNTERS] PROGRAM TOTAL : 146.8484s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4408s + [COUNTERS] CudaCpp MEs ( 2 ) : 142.3838s for 81920 events => throughput is 5.75E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -351,12 +351,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.850524e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.847317e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.880069e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.876249e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -381,9 +381,9 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 13.6509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5589s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.0709s for 8192 events => throughput is 6.27E+02 events/s + [COUNTERS] PROGRAM TOTAL : 13.2590s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5224s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7153s for 8192 events => throughput is 6.44E+02 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -417,10 +417,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 135.4503s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5206s - [COUNTERS] CudaCpp MEs ( 2 ) : 130.9090s for 81920 events => throughput is 6.26E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0207s + [COUNTERS] PROGRAM TOTAL : 130.9182s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4397s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.4573s for 81920 events => throughput is 6.48E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -433,12 +433,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.634471e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.748756e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.589322e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.729474e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -463,10 +463,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 13.3326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5808s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.7254s for 8192 events => throughput is 6.44E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0265s + [COUNTERS] PROGRAM TOTAL : 13.3128s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5317s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7560s for 8192 events => throughput is 6.42E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0251s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -499,10 +499,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 131.7116s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5046s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.1838s for 81920 events => throughput is 6.44E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0233s + [COUNTERS] PROGRAM TOTAL : 131.9849s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4720s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.4891s for 81920 events => throughput is 6.43E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -515,12 +515,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.895916e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.915808e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.878269e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.934421e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -544,10 +544,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.1535s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5385s for 8192 events => throughput is 1.52E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5593s + [COUNTERS] PROGRAM TOTAL : 2.1536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5409s for 8192 events => throughput is 1.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5574s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -579,10 +579,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 10.9212s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0072s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3407s for 81920 events => throughput is 1.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5732s + [COUNTERS] PROGRAM TOTAL : 10.9466s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0484s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3690s for 81920 events => throughput is 1.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5292s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -595,42 +595,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.542523e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538905e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.546779e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.534050e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.146798e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.137147e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.138507e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.187870e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.102140e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.144301e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.148371e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.161388e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.144042e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156097e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.992297e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.971114e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index f052f133a7..e04ca3f869 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512z +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-15_14:59:11 +DATE: 2024-09-18_16:14:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.7559s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5531s - [COUNTERS] Fortran MEs ( 1 ) : 101.2028s for 8192 events => throughput is 8.09E+01 events/s + [COUNTERS] PROGRAM TOTAL : 101.0730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5330s + [COUNTERS] Fortran MEs ( 1 ) : 100.5400s for 8192 events => throughput is 8.15E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 102.2220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5595s - [COUNTERS] Fortran MEs ( 1 ) : 101.6626s for 8192 events => throughput is 8.06E+01 events/s + [COUNTERS] PROGRAM TOTAL : 100.8963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5347s + [COUNTERS] Fortran MEs ( 1 ) : 100.3617s for 8192 events => throughput is 8.16E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1016.2987s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6111s - [COUNTERS] Fortran MEs ( 1 ) : 1011.6876s for 81920 events => throughput is 8.10E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1008.5494s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5481s + [COUNTERS] Fortran MEs ( 1 ) : 1004.0013s for 81920 events => throughput is 8.16E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 121.8737s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5730s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.0897s for 8192 events => throughput is 6.77E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2110s + [COUNTERS] PROGRAM TOTAL : 119.7272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5266s + [COUNTERS] CudaCpp MEs ( 2 ) : 118.9906s for 8192 events => throughput is 6.88E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2100s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1278.2990s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5229s - [COUNTERS] CudaCpp MEs ( 2 ) : 1273.5682s for 81920 events => throughput is 6.43E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2078s + [COUNTERS] PROGRAM TOTAL : 1235.8333s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4485s + [COUNTERS] CudaCpp MEs ( 2 ) : 1231.1755s for 81920 events => throughput is 6.65E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2092s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.297738e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.603593e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.358504e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.563954e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 67.0430s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5843s - [COUNTERS] CudaCpp MEs ( 2 ) : 66.3507s for 8192 events => throughput is 1.23E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1079s + [COUNTERS] PROGRAM TOTAL : 64.0094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5236s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.3817s for 8192 events => throughput is 1.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1041s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 651.0804s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5257s - [COUNTERS] CudaCpp MEs ( 2 ) : 646.4467s for 81920 events => throughput is 1.27E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1080s + [COUNTERS] PROGRAM TOTAL : 636.9448s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4347s + [COUNTERS] CudaCpp MEs ( 2 ) : 632.4070s for 81920 events => throughput is 1.30E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1031s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.537162e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548168e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.537881e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548990e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.5211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5700s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9053s for 8192 events => throughput is 2.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s + [COUNTERS] PROGRAM TOTAL : 28.2427s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5259s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6712s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 283.4125s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4921s - [COUNTERS] CudaCpp MEs ( 2 ) : 278.8756s for 81920 events => throughput is 2.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0448s + [COUNTERS] PROGRAM TOTAL : 285.3174s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4390s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.8328s for 81920 events => throughput is 2.92E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.574406e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.559056e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.562894e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.558709e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.8698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5658s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.2646s for 8192 events => throughput is 3.38E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s + [COUNTERS] PROGRAM TOTAL : 25.3100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5275s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7433s for 8192 events => throughput is 3.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0391s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 248.6338s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5381s - [COUNTERS] CudaCpp MEs ( 2 ) : 244.0566s for 81920 events => throughput is 3.36E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0391s + [COUNTERS] PROGRAM TOTAL : 250.0775s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4353s + [COUNTERS] CudaCpp MEs ( 2 ) : 245.6035s for 81920 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0387s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.158059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.143810e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.143650e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.119192e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 26.3376s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5632s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7273s for 8192 events => throughput is 3.18E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0471s + [COUNTERS] PROGRAM TOTAL : 25.7695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.1974s for 8192 events => throughput is 3.25E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0470s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 261.5729s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5211s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.0034s for 81920 events => throughput is 3.19E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0484s + [COUNTERS] PROGRAM TOTAL : 262.3166s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4708s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.7997s for 81920 events => throughput is 3.18E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.487285e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519965e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.483613e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518227e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.8854s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1263s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8776s for 8192 events => throughput is 9.33E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8815s + [COUNTERS] PROGRAM TOTAL : 2.7761s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8762s for 8192 events => throughput is 9.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8750s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 14.5688s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0388s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.6484s for 81920 events => throughput is 9.47E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8816s + [COUNTERS] PROGRAM TOTAL : 14.4588s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9583s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.6324s for 81920 events => throughput is 9.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8681s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.426842e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.423002e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.075881e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078690e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104813e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154223e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152942e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.105665e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106947e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108609e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110409e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106895e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106917e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.678273e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676393e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index ae3de12b01..13fa996bcb 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_12:39:45 +DATE: 2024-09-18_13:55:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5503s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4781s - [COUNTERS] Fortran MEs ( 1 ) : 0.0722s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5290s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4573s + [COUNTERS] Fortran MEs ( 1 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4980s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4263s - [COUNTERS] Fortran MEs ( 1 ) : 0.0717s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6426s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9227s - [COUNTERS] Fortran MEs ( 1 ) : 0.7199s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5996s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8818s + [COUNTERS] Fortran MEs ( 1 ) : 0.7178s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4265s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4885s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4104s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.7326s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7853s for 81920 events => throughput is 1.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6882s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7791s for 81920 events => throughput is 1.05E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069256e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.061513e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.071686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.071810e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4501s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.90E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3706s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9391s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4311s for 81920 events => throughput is 1.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3263s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4293s for 81920 events => throughput is 1.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.920806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907415e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.914858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.918851e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4266s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0249s for 8192 events => throughput is 3.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4325s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2505s for 81920 events => throughput is 3.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.1464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2488s for 81920 events => throughput is 3.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.315064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.325644e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.353486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.338006e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4550s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4312s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9556s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2263s for 81920 events => throughput is 3.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1356s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2252s for 81920 events => throughput is 3.64E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.736761e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464520e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.761611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4645s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 8192 events => throughput is 2.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4489s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0347s for 8192 events => throughput is 2.36E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2885s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3420s for 81920 events => throughput is 2.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2441s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3367s for 81920 events => throughput is 2.43E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.383703e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393978e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.334002e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.414185e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3721s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3607s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.97E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.3403s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.96E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.721561e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.881632e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.065390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.254031e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156601e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.277303e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.991698e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.101959e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.159535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.239939e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.254663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286389e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.182535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.254121e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.650805e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.640364e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index c9aade28c7..0c2abc603a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_12:40:18 +DATE: 2024-09-18_13:56:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4755s - [COUNTERS] Fortran MEs ( 1 ) : 0.0730s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5308s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4584s + [COUNTERS] Fortran MEs ( 1 ) : 0.0725s for 8192 events => throughput is 1.13E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4285s - [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4871s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s + [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6509s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9299s - [COUNTERS] Fortran MEs ( 1 ) : 0.7210s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6180s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8993s + [COUNTERS] Fortran MEs ( 1 ) : 0.7187s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5035s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4289s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0741s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0735s for 8192 events => throughput is 1.11E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6726s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9382s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7339s for 81920 events => throughput is 1.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.6552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9225s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7321s for 81920 events => throughput is 1.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.135662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.127783e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.135487e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4543s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 8192 events => throughput is 3.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4362s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4092s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2055s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9374s + [COUNTERS] PROGRAM TOTAL : 2.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s [COUNTERS] CudaCpp MEs ( 2 ) : 0.2678s for 81920 events => throughput is 3.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.039729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.031782e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.985614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.033396e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4424s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4288s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.15E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0714s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1346s for 81920 events => throughput is 6.08E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0338s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1338s for 81920 events => throughput is 6.12E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.180640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.165970e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.307549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.197191e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4399s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4274s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0732s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9485s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1245s for 81920 events => throughput is 6.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0322s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9087s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1232s for 81920 events => throughput is 6.65E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.792178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.618658e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.753332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.353403e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4268s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4238s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1153s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1702s for 81920 events => throughput is 4.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.0846s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1719s for 81920 events => throughput is 4.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.847714e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.731547e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.878988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814682e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8653s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8613s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.8504s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3785s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 81920 events => throughput is 9.25E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 2.3852s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.041241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.088649e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.251569e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406235e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.842862e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.833815e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.410741e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.147443e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.813185e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.839780e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.550006e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.591152e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522713e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.570167e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.198390e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.189550e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index e0b5569f21..d3b173c725 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-15_12:40:49 +DATE: 2024-09-18_13:56:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4774s - [COUNTERS] Fortran MEs ( 1 ) : 0.0737s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4531s + [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4306s - [COUNTERS] Fortran MEs ( 1 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] Fortran MEs ( 1 ) : 0.0722s for 8192 events => throughput is 1.13E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6463s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9275s - [COUNTERS] Fortran MEs ( 1 ) : 0.7189s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6071s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8898s + [COUNTERS] Fortran MEs ( 1 ) : 0.7173s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4367s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.4867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.7101s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7758s for 81920 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6750s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9006s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7737s for 81920 events => throughput is 1.06E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.061169e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.066197e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.068944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081547e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4594s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4139s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3778s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9488s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4286s for 81920 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3287s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4278s for 81920 events => throughput is 1.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915335e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.932977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909065e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4572s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 8192 events => throughput is 3.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1817s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9356s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2456s for 81920 events => throughput is 3.34E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1482s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9012s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 81920 events => throughput is 3.32E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.338992e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.313762e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.300070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.370922e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.67E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1685s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2194s for 81920 events => throughput is 3.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.1188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9005s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2179s for 81920 events => throughput is 3.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.788360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.747505e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.844368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784395e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0355s for 8192 events => throughput is 2.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3513s for 81920 events => throughput is 2.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.2638s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3492s for 81920 events => throughput is 2.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.305023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.324769e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.367510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.345529e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8671s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3800s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3686s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.3685s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 81920 events => throughput is 7.58E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.726178e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.938068e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.175370e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.128844e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.180169e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.264635e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.148445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.049713e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.160013e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.259722e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.247186e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.250826e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.158935e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279255e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.649343e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.647946e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index a0e790e59c..fad5d1a64f 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:18:04 +DATE: 2024-09-18_17:32:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.0034s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9555s - [COUNTERS] Fortran MEs ( 1 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9274s + [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s - [COUNTERS] Fortran MEs ( 1 ) : 0.0490s for 8192 events => throughput is 1.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5991s - [COUNTERS] Fortran MEs ( 1 ) : 0.4774s for 81920 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5836s + [COUNTERS] Fortran MEs ( 1 ) : 0.4763s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0517s for 8192 events => throughput is 1.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4083s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0501s for 8192 events => throughput is 1.64E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.1070s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5123s for 81920 events => throughput is 1.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0776s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5087s for 81920 events => throughput is 1.61E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.652048e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.656917e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.659409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653176e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4727s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0298s for 8192 events => throughput is 2.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8851s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6055s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2793s for 81920 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8492s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2771s for 81920 events => throughput is 2.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.911436e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.029895e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.988061e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4534s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4359s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0171s for 8192 events => throughput is 4.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.81E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7663s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1673s for 81920 events => throughput is 4.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7395s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5731s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1660s for 81920 events => throughput is 4.93E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.936700e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.731125e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.961392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.942521e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4506s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.31E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7471s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5932s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1536s for 81920 events => throughput is 5.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.7429s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5869s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1556s for 81920 events => throughput is 5.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.479225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.403364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.515474e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.439647e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4603s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2327s for 81920 events => throughput is 3.52E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8196s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2345s for 81920 events => throughput is 3.49E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.498096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.483959e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.645446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.537527e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,8 +534,8 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8716s + [COUNTERS] PROGRAM TOTAL : 0.8564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8526s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0505s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 81920 events => throughput is 8.85E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.0272s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.829708e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.305682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382767e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.805842e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.774663e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.100377e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.124992e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.801291e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755835e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.430840e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.430950e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.808875e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756916e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.522017e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513302e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index e348b5e95d..4984f73b96 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:18:33 +DATE: 2024-09-18_17:32:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9307s - [COUNTERS] Fortran MEs ( 1 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4272s - [COUNTERS] Fortran MEs ( 1 ) : 0.0476s for 8192 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0898s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6127s - [COUNTERS] Fortran MEs ( 1 ) : 0.4771s for 81920 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0424s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5675s + [COUNTERS] Fortran MEs ( 1 ) : 0.4749s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.4982s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4507s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0471s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4547s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -145,7 +145,7 @@ OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ b *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 < 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index aee293444e..e45c8953e0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:18:40 +DATE: 2024-09-18_17:32:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9822s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9342s - [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9096s + [COUNTERS] Fortran MEs ( 1 ) : 0.0478s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4267s - [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0803s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6006s - [COUNTERS] Fortran MEs ( 1 ) : 0.4796s for 81920 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5636s + [COUNTERS] Fortran MEs ( 1 ) : 0.4741s for 81920 events => throughput is 1.73E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -135,9 +135,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4941s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4424s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0513s for 8192 events => throughput is 1.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0511s for 8192 events => throughput is 1.60E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -171,9 +171,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0975s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5083s for 81920 events => throughput is 1.61E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0631s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5575s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5052s for 81920 events => throughput is 1.62E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -189,13 +189,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.547214e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.544125e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.555047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.558149e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -220,9 +220,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4608s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4324s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.95E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -256,10 +256,10 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8697s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5908s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2785s for 81920 events => throughput is 2.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.8495s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2768s for 81920 events => throughput is 2.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -274,13 +274,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.808635e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.837690e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.832759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886878e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -305,10 +305,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4521s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4343s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -341,9 +341,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7695s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1725s for 81920 events => throughput is 4.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7266s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1685s for 81920 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -359,13 +359,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.778867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.779100e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.788127e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -390,9 +390,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4553s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4389s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4240s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.24E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -426,9 +426,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7602s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1565s for 81920 events => throughput is 5.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7202s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5626s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1572s for 81920 events => throughput is 5.21E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -444,13 +444,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.160486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.210846e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.247106e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.166322e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -475,9 +475,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4605s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4361s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 8192 events => throughput is 3.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -511,9 +511,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8377s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5959s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2414s for 81920 events => throughput is 3.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5683s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2395s for 81920 events => throughput is 3.42E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -529,13 +529,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.114993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.192275e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.218557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.206869e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -559,9 +559,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8691s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8652s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8528s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8489s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -594,9 +594,9 @@ DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0649s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 81920 events => throughput is 8.92E+06 events/s + [COUNTERS] PROGRAM TOTAL : 2.0087s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -610,42 +610,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.889005e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939022e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.160378e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.244768e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.798951e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.749489e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.069218e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.094535e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.788436e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768020e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.498432e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.372316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.792905e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.761406e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.500899e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.486762e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 3944248170..cf925a09c6 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:21:57 +DATE: 2024-09-18_17:35:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.7837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4036s - [COUNTERS] Fortran MEs ( 1 ) : 2.3801s for 8192 events => throughput is 3.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3661s + [COUNTERS] Fortran MEs ( 1 ) : 2.2982s for 8192 events => throughput is 3.56E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3886s - [COUNTERS] Fortran MEs ( 1 ) : 2.3571s for 8192 events => throughput is 3.48E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3588s + [COUNTERS] Fortran MEs ( 1 ) : 2.2991s for 8192 events => throughput is 3.56E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.3649s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1183s - [COUNTERS] Fortran MEs ( 1 ) : 23.2466s for 81920 events => throughput is 3.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0583s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0507s + [COUNTERS] Fortran MEs ( 1 ) : 23.0076s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.8935s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3895s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4988s for 8192 events => throughput is 3.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 2.8350s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3611s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4687s for 8192 events => throughput is 3.32E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 27.0952s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1181s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9720s for 81920 events => throughput is 3.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 26.8636s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0489s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8095s for 81920 events => throughput is 3.30E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.400030e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.460402e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.433414e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457448e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.7089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.3094s for 8192 events => throughput is 6.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2927s for 8192 events => throughput is 6.34E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 15.2155s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1129s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.0999s for 81920 events => throughput is 6.25E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 15.0457s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0488s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.9941s for 81920 events => throughput is 6.30E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.519678e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.568599e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.485759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.542585e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9814s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5898s for 8192 events => throughput is 1.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5754s for 8192 events => throughput is 1.42E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.9019s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1015s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7989s for 81920 events => throughput is 1.41E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.7938s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0478s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7444s for 81920 events => throughput is 1.43E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.452031e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.465958e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.449039e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.451297e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3947s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5136s for 8192 events => throughput is 1.60E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5129s for 8192 events => throughput is 1.60E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.2541s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0919s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.1608s for 81920 events => throughput is 1.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1685s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0408s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1262s for 81920 events => throughput is 1.60E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.652584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.660633e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.662622e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.663487e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6792s for 8192 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6717s for 8192 events => throughput is 1.22E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.8889s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0984s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.7887s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.7351s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0383s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.6950s for 81920 events => throughput is 1.22E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.222969e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242378e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.228238e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242909e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8192s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + [COUNTERS] PROGRAM TOTAL : 0.8480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.7854s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5891s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1766s for 81920 events => throughput is 4.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + [COUNTERS] PROGRAM TOTAL : 2.6736s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4774s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1765s for 81920 events => throughput is 4.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.196050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.229187e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.528954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.527847e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.664076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.819324e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.233050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.226919e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.853541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.844216e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.202405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.225190e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.857146e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.847840e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.681732e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index e9deddff77..d625debf72 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:24:19 +DATE: 2024-09-18_17:38:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.7015s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s - [COUNTERS] Fortran MEs ( 1 ) : 2.3254s for 8192 events => throughput is 3.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3559s + [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7015s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3807s - [COUNTERS] Fortran MEs ( 1 ) : 2.3208s for 8192 events => throughput is 3.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6503s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s + [COUNTERS] Fortran MEs ( 1 ) : 2.2909s for 8192 events => throughput is 3.58E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.2760s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1081s - [COUNTERS] Fortran MEs ( 1 ) : 23.1679s for 81920 events => throughput is 3.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0293s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0436s + [COUNTERS] Fortran MEs ( 1 ) : 22.9857s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.8693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3947s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4694s for 8192 events => throughput is 3.32E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 2.7985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3626s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4310s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 26.6707s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1091s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.5567s for 81920 events => throughput is 3.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.3775s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0419s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.3307s for 81920 events => throughput is 3.37E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.460698e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.486852e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.449222e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.494086e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.1130s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4027s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7087s for 8192 events => throughput is 1.16E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6777s for 8192 events => throughput is 1.21E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 9.0988s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1256s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.9716s for 81920 events => throughput is 1.18E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 8.8470s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0545s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7910s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.222480e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236746e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.223984e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.228135e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6864s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3911s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2945s for 8192 events => throughput is 2.78E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.6595s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2926s for 8192 events => throughput is 2.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.0576s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0983s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.9584s for 81920 events => throughput is 2.77E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 5.0070s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0356s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.9705s for 81920 events => throughput is 2.76E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.831089e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.852598e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.778084e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.877017e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6632s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2682s for 8192 events => throughput is 3.05E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2632s for 8192 events => throughput is 3.11E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 4.7861s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0881s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6971s for 81920 events => throughput is 3.04E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.6833s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0329s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6497s for 81920 events => throughput is 3.09E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.155559e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.188563e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.172155e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.189121e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7427s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4008s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3409s for 8192 events => throughput is 2.40E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 0.6974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3373s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.5358s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1001s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4347s for 81920 events => throughput is 2.39E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0331s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3672s for 81920 events => throughput is 2.43E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.419635e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.455496e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.430312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.453904e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8242s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + [COUNTERS] PROGRAM TOTAL : 0.8395s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.7207s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5435s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1599s for 81920 events => throughput is 5.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s + [COUNTERS] PROGRAM TOTAL : 2.6550s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1594s for 81920 events => throughput is 5.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.203327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.221158e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.444470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.431078e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300793e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.300814e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.322450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.323922e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.296148e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.295837e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323252e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322906e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.292944e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.292673e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.656200e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656202e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 2926bfb0ab..e6874f3a32 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:26:16 +DATE: 2024-09-18_17:40:03 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.7071s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3810s - [COUNTERS] Fortran MEs ( 1 ) : 2.3260s for 8192 events => throughput is 3.52E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3549s + [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.6931s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3811s - [COUNTERS] Fortran MEs ( 1 ) : 2.3120s for 8192 events => throughput is 3.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s + [COUNTERS] Fortran MEs ( 1 ) : 2.2945s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.2391s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1065s - [COUNTERS] Fortran MEs ( 1 ) : 23.1326s for 81920 events => throughput is 3.54E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0481s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0523s + [COUNTERS] Fortran MEs ( 1 ) : 22.9958s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.9243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3965s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5226s for 8192 events => throughput is 3.25E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 2.8581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3622s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4908s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 27.2773s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1266s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1455s for 81920 events => throughput is 3.26E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 27.0498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0486s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9962s for 81920 events => throughput is 3.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.405812e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.436817e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.414011e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.435512e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6740s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3954s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2759s for 8192 events => throughput is 6.42E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 1.6335s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3586s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2723s for 8192 events => throughput is 6.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 14.9334s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1011s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8296s for 81920 events => throughput is 6.39E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.8859s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0643s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8189s for 81920 events => throughput is 6.39E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.728250e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.796385e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.779541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.784638e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5743s for 8192 events => throughput is 1.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9347s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5694s for 8192 events => throughput is 1.44E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.8646s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1024s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7607s for 81920 events => throughput is 1.42E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 7.7335s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0485s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6834s for 81920 events => throughput is 1.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.464043e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.478402e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.467432e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.467295e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8994s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3907s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5073s for 8192 events => throughput is 1.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8664s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5018s for 8192 events => throughput is 1.63E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.1742s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0971s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.0757s for 81920 events => throughput is 1.61E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.0416s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0429s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9973s for 81920 events => throughput is 1.64E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.629585e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.693290e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.667414e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.684165e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0767s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3890s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6858s for 8192 events => throughput is 1.19E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.0424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3606s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6800s for 8192 events => throughput is 1.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.9698s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0925s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.8755s for 81920 events => throughput is 1.19E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 8.8421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0543s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7861s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.207955e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.218603e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.210184e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233808e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8633s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 8192 events => throughput is 4.15E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.7183s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5211s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1774s for 81920 events => throughput is 4.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1767s for 81920 events => throughput is 4.64E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.181769e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.207155e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.501330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.529302e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.823476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.824963e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.163740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.207520e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.836976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.824989e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.201797e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.199605e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.682707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.829686e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.670380e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.672241e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 9ce56a2c20..a3ffe665a4 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_16:20:35 +DATE: 2024-09-18_17:34:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.7270s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7185s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6849s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4349s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.64E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6937s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6091s - [COUNTERS] Fortran MEs ( 1 ) : 0.0846s for 81920 events => throughput is 9.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5624s + [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4569s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4481s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.61E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.7096s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6246s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0847s for 81920 events => throughput is 9.68E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.6471s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5643s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.003482e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.967649e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.010577e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004982e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4364s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.80E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6661s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6195s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 81920 events => throughput is 1.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6254s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 81920 events => throughput is 1.81E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895351e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903119e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.970667e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966510e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4482s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4171s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6424s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 81920 events => throughput is 2.80E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5995s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 81920 events => throughput is 2.85E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.076046e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.156830e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.350533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.269475e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6351s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6077s + [COUNTERS] PROGRAM TOTAL : 1.5963s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5689s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.308950e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272274e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.410024e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.518220e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4495s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4193s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4158s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6424s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6109s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 81920 events => throughput is 2.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6175s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5862s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 81920 events => throughput is 2.65E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.879828e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.808476e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.100504e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.104718e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8776s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.8589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0680s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0594s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0146s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.04E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.097292e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.114496e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.375155e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.411895e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.418677e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.863144e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644247e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.479845e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424249e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.815670e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.758689e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.473337e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384808e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.228599e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.177573e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index dbd145c063..6af3b55835 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_16:21:03 +DATE: 2024-09-18_17:34:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.7082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6996s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6749s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.63E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4406s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4322s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4146s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6930s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6080s - [COUNTERS] Fortran MEs ( 1 ) : 0.0850s for 81920 events => throughput is 9.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6556s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5716s + [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4545s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6930s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6098s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0829s for 81920 events => throughput is 9.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 81920 events => throughput is 9.73E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.007261e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.013895e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.017605e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.014072e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4141s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.96E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6362s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6083s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 81920 events => throughput is 2.96E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5782s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 81920 events => throughput is 3.01E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.284018e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.210079e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432203e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272367e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4364s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.66E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6465s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6266s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 81920 events => throughput is 4.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6070s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 81920 events => throughput is 4.35E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.063003e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.874017e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.292038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.300612e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4489s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.54E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6259s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6077s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 81920 events => throughput is 4.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.5999s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 81920 events => throughput is 4.59E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.244642e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.302533e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.614839e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.611044e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4150s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6554s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6328s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 81920 events => throughput is 3.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6057s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 81920 events => throughput is 3.82E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.412735e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.359914e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.638041e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.606033e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8553s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0625s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0542s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s + [COUNTERS] PROGRAM TOTAL : 2.0187s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0105s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.06E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.154441e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.132925e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.353273e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.463748e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.331367e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.452376e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.058204e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074682e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.463227e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.432547e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.109516e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996097e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.155108e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.096714e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640171e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.649902e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 2e41ca2cbf..d3c2ed78ae 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-15_16:21:30 +DATE: 2024-09-18_17:35:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.7195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7107s - [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6841s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.39E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4410s - [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4135s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.7106s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6251s - [COUNTERS] Fortran MEs ( 1 ) : 0.0855s for 81920 events => throughput is 9.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5746s + [COUNTERS] Fortran MEs ( 1 ) : 0.0847s for 81920 events => throughput is 9.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4646s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4554s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.7066s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6200s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 81920 events => throughput is 9.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.6599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0848s for 81920 events => throughput is 9.66E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.753908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.812593e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000196e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.833045e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4497s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4168s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6465s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6016s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0447s for 81920 events => throughput is 1.83E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 81920 events => throughput is 1.81E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.842568e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934567e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.939712e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.973267e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4475s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6359s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 81920 events => throughput is 2.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6044s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 81920 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.319543e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.181731e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.469317e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488610e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.99E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4215s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.97E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,10 +409,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6340s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 81920 events => throughput is 3.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6145s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5872s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 81920 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.335525e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.215852e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.608974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.572337e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4504s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0307s for 81920 events => throughput is 2.67E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0301s for 81920 events => throughput is 2.73E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.036868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.871798e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.195892e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.069203e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8785s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8749s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.8557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8521s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0629s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.0158s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.079478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.203370e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.384918e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.482107e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465715e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.823345e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.776763e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.501214e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.434433e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.930940e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877036e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488831e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.445366e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231647e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175182e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 62e2554b8b..e14403d083 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:19:09 +DATE: 2024-09-18_17:33:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,8 +58,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8399s + [COUNTERS] PROGRAM TOTAL : 0.8478s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8053s [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4237s - [COUNTERS] Fortran MEs ( 1 ) : 0.0423s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9963s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5726s - [COUNTERS] Fortran MEs ( 1 ) : 0.4237s for 81920 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9546s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5320s + [COUNTERS] Fortran MEs ( 1 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4815s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0465s for 8192 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0447s for 8192 events => throughput is 1.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0315s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4490s for 81920 events => throughput is 1.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9966s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4479s for 81920 events => throughput is 1.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.850829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.854967e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.847409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.848854e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4433s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2530s for 81920 events => throughput is 3.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5526s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2552s for 81920 events => throughput is 3.21E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.295503e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.238674e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.383060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.323356e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4583s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4424s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4251s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7403s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1576s for 81920 events => throughput is 5.20E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.6875s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1550s for 81920 events => throughput is 5.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.346054e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.205465e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.315504e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.341963e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4533s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0146s for 8192 events => throughput is 5.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7255s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5809s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1443s for 81920 events => throughput is 5.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6853s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1419s for 81920 events => throughput is 5.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.745425e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.704465e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.928976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.807181e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4775s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8170s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2290s for 81920 events => throughput is 3.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.7690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2250s for 81920 events => throughput is 3.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.544976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577527e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.602571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629127e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8820s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,10 +569,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0328s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0229s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.9952s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 81920 events => throughput is 9.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.708123e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.949285e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.136288e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317105e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.859628e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.829932e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.597476e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.548750e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.838367e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.793745e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.970647e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.913836e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.822341e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.818636e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.671790e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.654381e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 5ea9a274c7..a972218890 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:19:38 +DATE: 2024-09-18_17:33:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8639s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8212s - [COUNTERS] Fortran MEs ( 1 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4259s - [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4056s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5755s - [COUNTERS] Fortran MEs ( 1 ) : 0.4243s for 81920 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5332s + [COUNTERS] Fortran MEs ( 1 ) : 0.4202s for 81920 events => throughput is 1.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4862s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4232s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9508s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5325s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4180s for 81920 events => throughput is 1.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.962000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.983462e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.971836e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.977072e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,9 +214,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4371s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,9 +249,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1719s for 81920 events => throughput is 4.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7014s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1712s for 81920 events => throughput is 4.79E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.761360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.686204e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.758225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.729824e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4469s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4171s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.21E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,9 +329,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6633s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5720s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0911s for 81920 events => throughput is 8.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6247s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 81920 events => throughput is 9.11E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.095324e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.731978e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.260486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.141403e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,9 +374,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4530s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4440s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.27E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6712s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5843s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0866s for 81920 events => throughput is 9.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6246s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5387s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0857s for 81920 events => throughput is 9.56E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.845166e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.618487e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.869743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.810592e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4622s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.53E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,10 +489,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7035s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5822s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1211s for 81920 events => throughput is 6.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5504s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1199s for 81920 events => throughput is 6.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.814469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.869027e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.887986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.899392e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8659s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.8527s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0160s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9894s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.03E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.873489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991468e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.285658e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344514e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.913803e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.881682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.363404e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.350971e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.939202e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.826185e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.391621e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.350870e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.636477e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.507679e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.005101e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.018982e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index a80fb58d5a..f3cbf0c54f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-15_16:20:06 +DATE: 2024-09-18_17:34:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8153s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7908s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4245s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0185s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5937s - [COUNTERS] Fortran MEs ( 1 ) : 0.4248s for 81920 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9646s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s + [COUNTERS] Fortran MEs ( 1 ) : 0.4230s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4381s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -169,9 +169,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0381s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4534s for 81920 events => throughput is 1.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9868s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4502s for 81920 events => throughput is 1.82E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.855837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.844411e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.855798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856447e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4365s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4119s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8351s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2495s for 81920 events => throughput is 3.28E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.7808s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5352s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2452s for 81920 events => throughput is 3.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.330553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.286947e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.338265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.353817e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4575s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4416s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7359s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5815s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1540s for 81920 events => throughput is 5.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.6932s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 81920 events => throughput is 5.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.295263e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.355089e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.300898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.376897e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4549s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4403s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -409,9 +409,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7287s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5853s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1430s for 81920 events => throughput is 5.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5520s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1424s for 81920 events => throughput is 5.75E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.880539e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.878311e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.012183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.966522e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,9 +454,9 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4599s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4371s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4311s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -489,9 +489,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8085s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2211s for 81920 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7576s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2187s for 81920 events => throughput is 3.74E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.675547e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.691521e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.724298e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773666e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -534,10 +534,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8700s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.8481s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -569,9 +569,9 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0197s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0099s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9812s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.08E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.935715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.958191e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.223055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401140e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.837385e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815576e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.518211e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.499893e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.850328e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.820308e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.872009e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.845220e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.860604e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813891e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.713800e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.729165e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** From 1b67e654dd1f2f4e7f1cf880d69428bf929d9510 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 19:47:30 +0200 Subject: [PATCH 59/76] [amd] rerun 96 tput builds and tests on LUMI worker node (small-g 72h) - all as expected STARTED AT Wed 18 Sep 2024 03:07:46 PM EEST ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean -nocuda ENDED(1) AT Wed 18 Sep 2024 05:21:56 PM EEST [Status=2] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean -nocuda ENDED(2) AT Wed 18 Sep 2024 06:00:57 PM EEST [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean -nocuda ENDED(3) AT Wed 18 Sep 2024 06:09:10 PM EEST [Status=2] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst -nocuda ENDED(4) AT Wed 18 Sep 2024 06:11:01 PM EEST [Status=0] SKIP './tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda' ENDED(5) AT Wed 18 Sep 2024 06:11:01 PM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda ENDED(6) AT Wed 18 Sep 2024 06:12:50 PM EEST [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean -nocuda ENDED(7) AT Wed 18 Sep 2024 07:30:15 PM EEST [Status=0] ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt:ERROR! C++ calculation (C++/GPU) failed ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt:/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault ./tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt:ERROR! C++ calculation (C++/GPU) failed --- .../log_eemumu_mad_d_inl0_hrd0.txt | 258 +++++--------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 276 ++++++--------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 244 +++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 261 +++++--------- .../log_eemumu_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_eemumu_mad_d_inl1_hrd0.txt | 258 +++++--------- .../log_eemumu_mad_d_inl1_hrd1.txt | 258 +++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 284 ++++++--------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 254 +++++--------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 269 ++++++--------- .../log_eemumu_mad_f_inl0_hrd1.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl1_hrd0.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl1_hrd1.txt | 268 ++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_eemumu_mad_m_inl0_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 254 +++++--------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 272 ++++++--------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 240 +++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 257 +++++--------- .../log_ggtt_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_d_inl1_hrd0.txt | 254 +++++--------- .../log_ggtt_mad_d_inl1_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 290 ++++++---------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 264 ++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 275 ++++++--------- .../log_ggtt_mad_f_inl0_hrd1.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl1_hrd0.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl1_hrd1.txt | 272 ++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 258 +++++--------- .../log_ggtt_mad_m_inl0_hrd1.txt | 258 +++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 293 ++++++---------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 315 +++++++---------- .../log_ggttg_mad_d_inl0_hrd1.txt | 293 ++++++---------- .../log_ggttg_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----------- .../log_ggttg_mad_f_inl0_hrd1.txt | 301 +++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 285 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 307 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 290 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 285 ++++++---------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 289 ++++++---------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 293 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 295 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 306 +++++++---------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 299 +++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 297 +++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 297 +++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 ++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 ++++---------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 ++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++---------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 +++++---------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 ++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 ++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 269 ++------------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 289 ++-------------- .../log_gqttq_mad_d_inl0_hrd1.txt | 269 ++------------- .../log_gqttq_mad_f_inl0_hrd0.txt | 269 ++------------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 289 ++-------------- .../log_gqttq_mad_f_inl0_hrd1.txt | 269 ++------------- .../log_gqttq_mad_m_inl0_hrd0.txt | 269 ++------------- .../log_gqttq_mad_m_inl0_hrd1.txt | 269 ++------------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 254 +++++--------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 270 ++++++--------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 272 ++++++--------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 252 +++++--------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 252 +++++--------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 281 ++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 281 ++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 301 +++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 250 +++++--------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 250 +++++--------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 262 ++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 262 ++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 254 +++++--------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 258 +++++--------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 270 ++++++--------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 270 ++++++--------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 254 +++++--------- 96 files changed, 8886 insertions(+), 17156 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 95eb3e309d..2360ada0f2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:11:40 -DATE: 2024-09-18_12:08:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.586175e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.543752e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.774580e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.722428 sec -INFO: No Floating Point Exceptions have been reported - 2,618,484,542 cycles # 2.848 GHz - 4,056,431,697 instructions # 1.55 insn per cycle - 1.017935073 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.191952e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.831109e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.948248e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.637744 sec +INFO: No Floating Point Exceptions have been reported + 1,376,729,510 cycles:u # 1.761 GHz (75.68%) + 2,773,495 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.25%) + 7,573,940 stalled-cycles-backend:u # 0.55% backend cycles idle (73.51%) + 2,354,068,706 instructions:u # 1.71 insn per cycle + # 0.00 stalled cycles per insn (73.83%) + 0.966210404 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.036688e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208211e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.208211e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.477862 sec -INFO: No Floating Point Exceptions have been reported - 19,055,513,200 cycles # 2.940 GHz - 46,088,548,361 instructions # 2.42 insn per cycle - 6.483409710 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.222095e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.399638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.399638e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.980544 sec +INFO: No Floating Point Exceptions have been reported + 17,714,141,475 cycles:u # 2.954 GHz (74.92%) + 50,088,011 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.98%) + 300,941,690 stalled-cycles-backend:u # 1.70% backend cycles idle (75.04%) + 47,054,202,718 instructions:u # 2.66 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 6.069874876 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.577995e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.056243e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.056243e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.401175 sec -INFO: No Floating Point Exceptions have been reported - 12,945,161,675 cycles # 2.938 GHz - 31,621,534,754 instructions # 2.44 insn per cycle - 4.406822784 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.762520e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.209848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.209848e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.411264 sec +INFO: No Floating Point Exceptions have been reported + 12,828,414,164 cycles:u # 2.908 GHz (75.03%) + 48,623,701 stalled-cycles-frontend:u # 0.38% frontend cycles idle (75.10%) + 491,523,625 stalled-cycles-backend:u # 3.83% backend cycles idle (75.04%) + 31,720,119,842 instructions:u # 2.47 insn per cycle + # 0.02 stalled cycles per insn (74.98%) + 4.527284897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.979178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.760192e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.760192e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.589167 sec -INFO: No Floating Point Exceptions have been reported - 10,070,726,803 cycles # 2.802 GHz - 19,587,544,877 instructions # 1.94 insn per cycle - 3.594697986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.467999e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.317943e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317943e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.375478 sec +INFO: No Floating Point Exceptions have been reported + 9,665,781,968 cycles:u # 2.853 GHz (74.99%) + 49,195,440 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.98%) + 913,032,881 stalled-cycles-backend:u # 9.45% backend cycles idle (74.98%) + 19,496,960,664 instructions:u # 2.02 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 3.436785449 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.973756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.755912e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.755912e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.601272 sec -INFO: No Floating Point Exceptions have been reported - 9,893,708,282 cycles # 2.744 GHz - 19,261,714,155 instructions # 1.95 insn per cycle - 3.606677205 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.684138e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.223088e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.223088e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.152604 sec -INFO: No Floating Point Exceptions have been reported - 8,635,892,874 cycles # 2.077 GHz - 15,755,316,929 instructions # 1.82 insn per cycle - 4.158382190 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 7e1127db04..1a1cae9db4 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:05:38 -DATE: 2024-09-18_12:51:56 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.746451e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921944e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.921944e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.223700 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,220,060,160 cycles # 2.915 GHz - 13,018,391,047 instructions # 1.80 insn per cycle - 2.533250665 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.831196e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.590388e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.590388e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.533079 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,064,474,817 cycles:u # 3.248 GHz (75.09%) + 111,016,051 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.13%) + 6,747,543,981 stalled-cycles-backend:u # 37.35% backend cycles idle (74.93%) + 16,621,660,063 instructions:u # 0.92 insn per cycle + # 0.41 stalled cycles per insn (74.82%) + 5.601394897 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.004941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165788e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165788e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.898046 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 20,322,657,427 cycles # 2.944 GHz - 46,321,216,193 instructions # 2.28 insn per cycle - 6.904944789 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.347971e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546387e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546387e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.517064 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,168,819,111 cycles:u # 3.282 GHz (75.00%) + 46,724,707 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.01%) + 305,030,598 stalled-cycles-backend:u # 1.68% backend cycles idle (75.02%) + 47,372,612,878 instructions:u # 2.61 insn per cycle + # 0.01 stalled cycles per insn (75.02%) + 5.541617287 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.496333e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.925589e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.925589e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.836326 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 14,226,515,518 cycles # 2.937 GHz - 32,466,683,813 instructions # 2.28 insn per cycle - 4.843971134 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.898977e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.367969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.367969e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.204327 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,584,919,540 cycles:u # 3.217 GHz (75.00%) + 50,454,529 stalled-cycles-frontend:u # 0.37% frontend cycles idle (75.01%) + 586,098,778 stalled-cycles-backend:u # 4.31% backend cycles idle (75.01%) + 32,631,269,808 instructions:u # 2.40 insn per cycle + # 0.02 stalled cycles per insn (75.00%) + 4.228292139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.825666e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.487837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.487837e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.083516 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,316,310,914 cycles # 2.767 GHz - 20,951,601,246 instructions # 1.85 insn per cycle - 4.090897830 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.580001e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.427352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.427352e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.333102 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,363,050,041 cycles:u # 3.091 GHz (74.98%) + 51,426,741 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.97%) + 966,849,777 stalled-cycles-backend:u # 9.33% backend cycles idle (74.95%) + 20,362,478,699 instructions:u # 1.96 insn per cycle + # 0.05 stalled cycles per insn (74.94%) + 3.357219989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.895357e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.603837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.603837e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.954670 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,188,953,637 cycles # 2.824 GHz - 20,622,311,623 instructions # 1.84 insn per cycle - 3.962452110 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.623904e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.111036e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.111036e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.511201 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,933,844,941 cycles # 2.199 GHz - 16,904,875,780 instructions # 1.70 insn per cycle - 4.518707685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index d91c4828d9..7a06736985 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:11:05 -DATE: 2024-09-18_13:04:11 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.531377e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.591267e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.748328e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.197443e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.883160e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.001388e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.353084 sec -INFO: No Floating Point Exceptions have been reported - 4,633,251,875 cycles # 2.904 GHz - 7,212,974,866 instructions # 1.56 insn per cycle - 1.652016166 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 4.752319 sec +INFO: No Floating Point Exceptions have been reported + 15,508,972,207 cycles:u # 3.247 GHz (74.90%) + 53,853,359 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.91%) + 6,687,172,993 stalled-cycles-backend:u # 43.12% backend cycles idle (75.04%) + 11,593,559,431 instructions:u # 0.75 insn per cycle + # 0.58 stalled cycles per insn (75.01%) + 4.807135237 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.029394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199449e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199449e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.365933e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.568505e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568505e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.897530 sec -INFO: No Floating Point Exceptions have been reported - 20,162,123,319 cycles # 2.922 GHz - 46,195,009,239 instructions # 2.29 insn per cycle - 6.903032860 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.347034 sec +INFO: No Floating Point Exceptions have been reported + 17,796,682,629 cycles:u # 3.322 GHz (74.93%) + 50,746,150 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.94%) + 238,242,947 stalled-cycles-backend:u # 1.34% backend cycles idle (75.01%) + 47,050,275,770 instructions:u # 2.64 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 5.359517604 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.570184e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.049072e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.049072e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.013710e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510635e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510635e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.798502 sec -INFO: No Floating Point Exceptions have been reported - 14,063,092,419 cycles # 2.928 GHz - 31,626,728,543 instructions # 2.25 insn per cycle - 4.804471582 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.861663 sec +INFO: No Floating Point Exceptions have been reported + 12,667,828,509 cycles:u # 3.272 GHz (75.00%) + 51,640,119 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.00%) + 477,608,047 stalled-cycles-backend:u # 3.77% backend cycles idle (75.00%) + 31,726,703,174 instructions:u # 2.50 insn per cycle + # 0.02 stalled cycles per insn (75.00%) + 3.874086240 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.970535e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.757812e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.757812e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.716466e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.642060e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.642060e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.978190 sec -INFO: No Floating Point Exceptions have been reported - 11,201,119,802 cycles # 2.813 GHz - 19,490,103,913 instructions # 1.74 insn per cycle - 3.984105389 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +TOTAL : 3.052112 sec +INFO: No Floating Point Exceptions have been reported + 9,828,986,051 cycles:u # 3.210 GHz (74.92%) + 51,915,511 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.81%) + 959,107,572 stalled-cycles-backend:u # 9.76% backend cycles idle (74.81%) + 19,515,562,689 instructions:u # 1.99 insn per cycle + # 0.05 stalled cycles per insn (75.04%) + 3.064343742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.023963e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.847631e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.847631e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.897854 sec -INFO: No Floating Point Exceptions have been reported - 11,011,148,409 cycles # 2.821 GHz - 18,950,488,449 instructions # 1.72 insn per cycle - 3.903822013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.727909e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.291192e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.291192e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.434907 sec -INFO: No Floating Point Exceptions have been reported - 9,769,161,551 cycles # 2.200 GHz - 15,456,644,765 instructions # 1.58 insn per cycle - 4.440874371 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 95f355ef67..832280b89b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:09:14 -DATE: 2024-09-18_12:58:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.092066e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.598729e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.734259e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.884241 sec -INFO: No Floating Point Exceptions have been reported - 6,167,226,842 cycles # 2.911 GHz - 11,436,463,316 instructions # 1.85 insn per cycle - 2.174841291 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.802313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.853145e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.971655e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.537151 sec +INFO: No Floating Point Exceptions have been reported + 17,595,795,010 cycles:u # 3.161 GHz (74.88%) + 112,097,181 stalled-cycles-frontend:u # 0.64% frontend cycles idle (74.80%) + 6,681,120,017 stalled-cycles-backend:u # 37.97% backend cycles idle (75.03%) + 16,149,336,211 instructions:u # 0.92 insn per cycle + # 0.41 stalled cycles per insn (75.14%) + 5.661018651 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.035778e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.207383e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.207383e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.487731 sec -INFO: No Floating Point Exceptions have been reported - 19,058,569,596 cycles # 2.936 GHz - 46,087,741,277 instructions # 2.42 insn per cycle - 6.493592711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.319894e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.513702e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.513702e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.527009 sec +INFO: No Floating Point Exceptions have been reported + 17,866,125,388 cycles:u # 3.227 GHz (75.01%) + 50,618,831 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.01%) + 308,495,653 stalled-cycles-backend:u # 1.73% backend cycles idle (75.01%) + 47,204,169,830 instructions:u # 2.64 insn per cycle + # 0.01 stalled cycles per insn (75.01%) + 5.539238754 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.566016e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.044387e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.044387e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.436390 sec -INFO: No Floating Point Exceptions have been reported - 12,971,922,098 cycles # 2.921 GHz - 31,622,790,809 instructions # 2.44 insn per cycle - 4.442502369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.021747e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.523645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.523645e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.851218 sec +INFO: No Floating Point Exceptions have been reported + 12,650,891,754 cycles:u # 3.276 GHz (74.93%) + 51,204,488 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.94%) + 478,826,954 stalled-cycles-backend:u # 3.78% backend cycles idle (74.95%) + 31,826,900,534 instructions:u # 2.52 insn per cycle + # 0.02 stalled cycles per insn (74.94%) + 3.863373441 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.978030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.768932e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.768932e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.592451 sec -INFO: No Floating Point Exceptions have been reported - 10,115,574,971 cycles # 2.812 GHz - 19,587,420,856 instructions # 1.94 insn per cycle - 3.598300355 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.777286e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.747102e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.747102e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.002515 sec +INFO: No Floating Point Exceptions have been reported + 9,691,495,078 cycles:u # 3.217 GHz (74.98%) + 49,192,845 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.04%) + 907,793,158 stalled-cycles-backend:u # 9.37% backend cycles idle (75.04%) + 19,510,741,214 instructions:u # 2.01 insn per cycle + # 0.05 stalled cycles per insn (75.04%) + 3.014576983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.014830e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.827477e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.827477e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.531675 sec -INFO: No Floating Point Exceptions have been reported - 9,897,196,547 cycles # 2.799 GHz - 19,249,419,683 instructions # 1.94 insn per cycle - 3.537559003 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.720646e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.279247e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.279247e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.067635 sec -INFO: No Floating Point Exceptions have been reported - 8,664,270,263 cycles # 2.127 GHz - 15,755,691,110 instructions # 1.82 insn per cycle - 4.073643316 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index e73a9b015a..216a9f2843 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:12:00 -DATE: 2024-09-18_12:09:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.079594e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.670378e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.825463e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.680250 sec -INFO: No Floating Point Exceptions have been reported - 2,578,534,884 cycles # 2.821 GHz - 4,030,538,684 instructions # 1.56 insn per cycle - 0.973967444 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.463295e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.380629e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.522784e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.561438 sec +INFO: No Floating Point Exceptions have been reported + 1,323,775,686 cycles:u # 2.265 GHz (75.31%) + 2,550,426 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.27%) + 8,480,241 stalled-cycles-backend:u # 0.64% backend cycles idle (74.97%) + 2,375,555,887 instructions:u # 1.79 insn per cycle + # 0.00 stalled cycles per insn (72.80%) + 0.707689347 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.022698e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.191211e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.191211e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.567027 sec -INFO: No Floating Point Exceptions have been reported - 19,075,762,627 cycles # 2.903 GHz - 46,055,106,551 instructions # 2.41 insn per cycle - 6.572547698 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.218296e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.395918e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.395918e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.995484 sec +INFO: No Floating Point Exceptions have been reported + 17,735,182,931 cycles:u # 2.952 GHz (74.98%) + 49,633,694 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.98%) + 806,527,546 stalled-cycles-backend:u # 4.55% backend cycles idle (74.98%) + 46,692,259,990 instructions:u # 2.63 insn per cycle + # 0.02 stalled cycles per insn (74.98%) + 6.083852725 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 489) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.585711e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.070341e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.070341e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.383967 sec -INFO: No Floating Point Exceptions have been reported - 12,890,625,740 cycles # 2.937 GHz - 31,557,909,117 instructions # 2.45 insn per cycle - 4.389588631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.807187e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.258576e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.258576e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.310768 sec +INFO: No Floating Point Exceptions have been reported + 12,550,862,557 cycles:u # 2.903 GHz (75.02%) + 50,726,873 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.02%) + 291,932,402 stalled-cycles-backend:u # 2.33% backend cycles idle (75.02%) + 31,508,960,309 instructions:u # 2.51 insn per cycle + # 0.01 stalled cycles per insn (75.02%) + 4.414876736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.969969e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.755961e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.755961e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.604328 sec -INFO: No Floating Point Exceptions have been reported - 10,100,174,359 cycles # 2.799 GHz - 19,576,296,506 instructions # 1.94 insn per cycle - 3.609879791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1894) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.592638e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.453585e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.453585e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.200516 sec +INFO: No Floating Point Exceptions have been reported + 9,890,103,583 cycles:u # 3.075 GHz (75.00%) + 47,965,827 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.90%) + 314,081,164 stalled-cycles-backend:u # 3.18% backend cycles idle (74.89%) + 19,346,133,777 instructions:u # 1.96 insn per cycle + # 0.02 stalled cycles per insn (74.95%) + 3.344858347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.022206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.841390e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.841390e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.520245 sec -INFO: No Floating Point Exceptions have been reported - 9,894,539,917 cycles # 2.807 GHz - 19,271,397,768 instructions # 1.95 insn per cycle - 3.525910639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.762660e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347769e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347769e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.982657 sec -INFO: No Floating Point Exceptions have been reported - 8,470,289,841 cycles # 2.124 GHz - 15,587,855,124 instructions # 1.84 insn per cycle - 3.988212621 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 833) (512y: 153) (512z: 1240) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 8184b4eff2..f97660e788 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:55:59 -DATE: 2024-09-18_12:42:17 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.357145e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.547980e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.727026e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.689616 sec -INFO: No Floating Point Exceptions have been reported - 2,681,392,745 cycles # 2.885 GHz - 4,097,806,151 instructions # 1.53 insn per cycle - 0.986657014 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.212705e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.866236e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.984628e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.535700 sec +INFO: No Floating Point Exceptions have been reported + 1,422,688,283 cycles:u # 2.563 GHz (74.91%) + 2,445,274 stalled-cycles-frontend:u # 0.17% frontend cycles idle (76.26%) + 6,030,553 stalled-cycles-backend:u # 0.42% backend cycles idle (76.20%) + 2,382,760,221 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (74.79%) + 0.600574954 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.608983e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.060555e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.060555e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.321503 sec -INFO: No Floating Point Exceptions have been reported - 12,686,452,587 cycles # 2.933 GHz - 32,573,246,433 instructions # 2.57 insn per cycle - 4.326967751 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.911736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.338775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.338775e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.044684 sec +INFO: No Floating Point Exceptions have been reported + 13,050,068,497 cycles:u # 3.218 GHz (74.97%) + 50,107,237 stalled-cycles-frontend:u # 0.38% frontend cycles idle (74.95%) + 115,123,066 stalled-cycles-backend:u # 0.88% backend cycles idle (74.97%) + 36,903,248,617 instructions:u # 2.83 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 4.060765563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.001283e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.839506e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.839506e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.560441 sec -INFO: No Floating Point Exceptions have been reported - 10,462,099,873 cycles # 2.934 GHz - 24,899,188,532 instructions # 2.38 insn per cycle - 3.566316228 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.605004e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533754e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533754e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.155865 sec +INFO: No Floating Point Exceptions have been reported + 10,148,435,458 cycles:u # 3.205 GHz (75.01%) + 46,634,347 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.99%) + 104,294,136 stalled-cycles-backend:u # 1.03% backend cycles idle (74.99%) + 24,440,653,533 instructions:u # 2.41 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 3.170871547 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.199006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.213700e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.213700e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.271279 sec -INFO: No Floating Point Exceptions have been reported - 9,171,998,387 cycles # 2.800 GHz - 16,835,147,245 instructions # 1.84 insn per cycle - 3.276861848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1599) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.188423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.546970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.546970e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.710085 sec +INFO: No Floating Point Exceptions have been reported + 8,663,850,993 cycles:u # 3.185 GHz (75.01%) + 51,683,213 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.01%) + 70,391,017 stalled-cycles-backend:u # 0.81% backend cycles idle (75.01%) + 16,864,309,931 instructions:u # 1.95 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 2.725041597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2981) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.270242e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.359980e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.359980e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.184078 sec -INFO: No Floating Point Exceptions have been reported - 8,899,793,398 cycles # 2.791 GHz - 16,396,706,280 instructions # 1.84 insn per cycle - 3.189617083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1355) (512y: 139) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.962735e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.715557e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.715557e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.614098 sec -INFO: No Floating Point Exceptions have been reported - 7,891,427,724 cycles # 2.181 GHz - 14,556,226,424 instructions # 1.84 insn per cycle - 3.619718707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 946) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index a7c1b0753b..0634ce5a2b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:56:13 -DATE: 2024-09-18_12:42:43 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.653794e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.579157e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.778336e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.686013 sec -INFO: No Floating Point Exceptions have been reported - 2,680,206,326 cycles # 2.872 GHz - 4,167,068,379 instructions # 1.55 insn per cycle - 0.992521934 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.477604e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.411703e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.565078e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.514505 sec +INFO: No Floating Point Exceptions have been reported + 1,395,228,003 cycles:u # 2.612 GHz (74.64%) + 2,518,551 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.17%) + 5,517,893 stalled-cycles-backend:u # 0.40% backend cycles idle (75.33%) + 2,173,715,399 instructions:u # 1.56 insn per cycle + # 0.00 stalled cycles per insn (74.79%) + 0.576603498 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.093810e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.934858e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.934858e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.416949 sec -INFO: No Floating Point Exceptions have been reported - 10,012,195,167 cycles # 2.926 GHz - 25,507,793,848 instructions # 2.55 insn per cycle - 3.422575217 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.663928e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.540166e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540166e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.099990 sec +INFO: No Floating Point Exceptions have been reported + 10,034,826,717 cycles:u # 3.226 GHz (74.92%) + 49,456,007 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.05%) + 47,625,467 stalled-cycles-backend:u # 0.47% backend cycles idle (75.05%) + 28,198,545,003 instructions:u # 2.81 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 3.115118375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 609) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.342172e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.581913e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.581913e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.102371 sec -INFO: No Floating Point Exceptions have been reported - 9,123,975,305 cycles # 2.936 GHz - 21,542,843,128 instructions # 2.36 insn per cycle - 3.108003766 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.919225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.110858e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.110858e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.892504 sec +INFO: No Floating Point Exceptions have been reported + 9,258,923,431 cycles:u # 3.190 GHz (74.95%) + 49,114,037 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.92%) + 57,459,537 stalled-cycles-backend:u # 0.62% backend cycles idle (74.94%) + 21,316,906,499 instructions:u # 2.30 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 2.907430612 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2070) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.389028e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.617798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.617798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.042023 sec -INFO: No Floating Point Exceptions have been reported - 8,587,076,543 cycles # 2.818 GHz - 15,956,957,926 instructions # 1.86 insn per cycle - 3.047668407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.415515e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.012681e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.012681e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.582235 sec +INFO: No Floating Point Exceptions have been reported + 8,197,347,538 cycles:u # 3.162 GHz (75.01%) + 49,538,709 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.01%) + 67,424,791 stalled-cycles-backend:u # 0.82% backend cycles idle (75.01%) + 15,821,676,578 instructions:u # 1.93 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 2.597459574 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165086E-002 +Relative difference = 1.0277089447254817e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.421436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.692453e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.692453e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.006782 sec -INFO: No Floating Point Exceptions have been reported - 8,445,737,284 cycles # 2.805 GHz - 15,563,019,384 instructions # 1.84 insn per cycle - 3.012659502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.061400e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.904070e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.904070e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.459956 sec -INFO: No Floating Point Exceptions have been reported - 7,611,248,188 cycles # 2.197 GHz - 14,286,576,836 instructions # 1.88 insn per cycle - 3.465475679 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1031) (512y: 164) (512z: 876) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index db1ecc021d..3821cdc626 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:12:18 -DATE: 2024-09-18_12:09:40 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.236538e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.678017e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.558515e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.582271 sec -INFO: No Floating Point Exceptions have been reported - 2,326,541,752 cycles # 2.874 GHz - 3,619,452,327 instructions # 1.56 insn per cycle - 0.866579999 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.403119e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.105240e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.280053e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.424406 sec +INFO: No Floating Point Exceptions have been reported + 999,360,396 cycles:u # 2.358 GHz (74.54%) + 2,562,121 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.49%) + 10,519,637 stalled-cycles-backend:u # 1.05% backend cycles idle (75.27%) + 1,936,418,482 instructions:u # 1.94 insn per cycle + # 0.01 stalled cycles per insn (73.93%) + 0.550736823 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.078772e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.274321e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274321e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.204384 sec -INFO: No Floating Point Exceptions have been reported - 18,271,266,571 cycles # 2.943 GHz - 45,007,026,058 instructions # 2.46 insn per cycle - 6.209806202 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.590934e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.865276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.865276e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.635572 sec +INFO: No Floating Point Exceptions have been reported + 15,421,260,631 cycles:u # 3.321 GHz (75.02%) + 40,826,297 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.02%) + 496,980,116 stalled-cycles-backend:u # 3.22% backend cycles idle (75.02%) + 46,999,382,944 instructions:u # 3.05 insn per cycle + # 0.01 stalled cycles per insn (75.02%) + 4.715017705 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.258213e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.443370e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.443370e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.161459 sec -INFO: No Floating Point Exceptions have been reported - 9,301,142,039 cycles # 2.938 GHz - 22,273,650,036 instructions # 2.39 insn per cycle - 3.166937253 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.115905e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.429338e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.429338e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.710652 sec +INFO: No Floating Point Exceptions have been reported + 8,665,630,615 cycles:u # 3.188 GHz (74.95%) + 39,210,982 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.99%) + 1,214,273,177 stalled-cycles-backend:u # 14.01% backend cycles idle (74.99%) + 22,397,892,524 instructions:u # 2.58 insn per cycle + # 0.05 stalled cycles per insn (74.99%) + 2.781731901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.422291e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.701347e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.701347e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.969313 sec -INFO: No Floating Point Exceptions have been reported - 8,389,284,998 cycles # 2.822 GHz - 15,752,357,337 instructions # 1.88 insn per cycle - 2.974718872 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.329043e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.854609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.854609e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.609169 sec +INFO: No Floating Point Exceptions have been reported + 7,939,202,306 cycles:u # 3.041 GHz (75.05%) + 41,060,608 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.94%) + 1,746,192,525 stalled-cycles-backend:u # 21.99% backend cycles idle (74.89%) + 15,491,305,602 instructions:u # 1.95 insn per cycle + # 0.11 stalled cycles per insn (74.91%) + 2.721507178 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.405471e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684326e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684326e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.990876 sec -INFO: No Floating Point Exceptions have been reported - 8,285,038,888 cycles # 2.766 GHz - 15,588,340,357 instructions # 1.88 insn per cycle - 2.996605246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.444926e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.740937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.740937e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.948513 sec -INFO: No Floating Point Exceptions have been reported - 6,657,028,546 cycles # 2.254 GHz - 12,863,339,645 instructions # 1.93 insn per cycle - 2.954217512 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 47dd15a77b..c6804ffdb2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:06:00 -DATE: 2024-09-18_12:52:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.148525e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.888705e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.888705e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.710491 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,650,857,001 cycles # 2.904 GHz - 10,226,411,017 instructions # 1.81 insn per cycle - 2.002623091 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.949156e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.073898e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.073898e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.490726 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,549,834,909 cycles:u # 3.183 GHz (74.85%) + 213,060,941 stalled-cycles-frontend:u # 1.21% frontend cycles idle (74.99%) + 6,636,265,921 stalled-cycles-backend:u # 37.81% backend cycles idle (75.12%) + 16,546,926,833 instructions:u # 0.94 insn per cycle + # 0.40 stalled cycles per insn (75.11%) + 5.556605697 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.051342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.236619e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.236619e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.467611 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,975,945,343 cycles # 2.932 GHz - 45,166,614,913 instructions # 2.38 insn per cycle - 6.474019296 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.598329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877545e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877545e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.677709 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,505,411,543 cycles:u # 3.307 GHz (74.87%) + 36,424,638 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.85%) + 461,953,180 stalled-cycles-backend:u # 2.98% backend cycles idle (74.95%) + 47,366,132,559 instructions:u # 3.05 insn per cycle + # 0.01 stalled cycles per insn (75.04%) + 4.694974250 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.143329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.199468e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.199468e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.443303 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,057,348,114 cycles # 2.916 GHz - 23,610,490,289 instructions # 2.35 insn per cycle - 3.450411330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.042259e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.284113e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.284113e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.829756 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,055,489,718 cycles:u # 3.187 GHz (74.95%) + 38,127,414 stalled-cycles-frontend:u # 0.42% frontend cycles idle (74.95%) + 1,273,596,323 stalled-cycles-backend:u # 14.06% backend cycles idle (74.97%) + 23,471,932,770 instructions:u # 2.59 insn per cycle + # 0.05 stalled cycles per insn (74.97%) + 2.846916011 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.288972e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.428534e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.428534e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.255640 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,181,255,557 cycles # 2.815 GHz - 16,874,424,213 instructions # 1.84 insn per cycle - 3.262739708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.400094e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.900382e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.900382e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.615660 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,346,715,316 cycles:u # 3.177 GHz (74.93%) + 41,455,442 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.03%) + 1,754,761,500 stalled-cycles-backend:u # 21.02% backend cycles idle (75.03%) + 16,486,470,163 instructions:u # 1.98 insn per cycle + # 0.11 stalled cycles per insn (75.03%) + 2.632632852 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.308266e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.504995e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.504995e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.233704 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,120,657,024 cycles # 2.815 GHz - 16,716,849,319 instructions # 1.83 insn per cycle - 3.240866405 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.329690e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.465437e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.465437e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.207876 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,429,892,192 cycles # 2.312 GHz - 14,072,572,968 instructions # 1.89 insn per cycle - 3.215041865 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index aa8d2ebaf9..b37392835e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:11:26 -DATE: 2024-09-18_13:04:44 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.285600e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.265115e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.156209e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.210437 sec -INFO: No Floating Point Exceptions have been reported - 4,156,256,455 cycles # 2.890 GHz - 6,567,216,103 instructions # 1.58 insn per cycle - 1.494886653 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.405910e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.163696e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346223e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371906e-02 +- 3.274477e-06 ) GeV^0 +TOTAL : 4.597944 sec +INFO: No Floating Point Exceptions have been reported + 15,023,376,445 cycles:u # 3.253 GHz (74.95%) + 154,433,581 stalled-cycles-frontend:u # 1.03% frontend cycles idle (74.89%) + 6,721,315,352 stalled-cycles-backend:u # 44.74% backend cycles idle (74.92%) + 11,325,642,994 instructions:u # 0.75 insn per cycle + # 0.59 stalled cycles per insn (75.05%) + 4.651851629 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.068119e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.263869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.263869e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.606207e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.886415e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.886415e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.607111 sec -INFO: No Floating Point Exceptions have been reported - 19,321,982,901 cycles # 2.923 GHz - 45,195,162,918 instructions # 2.34 insn per cycle - 6.612467743 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.598258 sec +INFO: No Floating Point Exceptions have been reported + 15,285,050,760 cycles:u # 3.319 GHz (74.99%) + 40,424,777 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) + 450,797,994 stalled-cycles-backend:u # 2.95% backend cycles idle (74.98%) + 47,165,248,582 instructions:u # 3.09 insn per cycle + # 0.01 stalled cycles per insn (74.98%) + 4.606838696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.243612e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.428241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.428241e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.159954e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.504617e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.504617e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.520615 sec -INFO: No Floating Point Exceptions have been reported - 10,297,430,100 cycles # 2.921 GHz - 22,355,563,747 instructions # 2.17 insn per cycle - 3.526233568 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.677324 sec +INFO: No Floating Point Exceptions have been reported + 8,616,252,132 cycles:u # 3.211 GHz (74.97%) + 39,194,881 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.96%) + 1,204,691,262 stalled-cycles-backend:u # 13.98% backend cycles idle (74.96%) + 22,427,431,536 instructions:u # 2.60 insn per cycle + # 0.05 stalled cycles per insn (74.96%) + 2.685817270 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.394598e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672540e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672540e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.488178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.101916e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.101916e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.341420 sec -INFO: No Floating Point Exceptions have been reported - 9,418,657,206 cycles # 2.815 GHz - 15,664,231,235 instructions # 1.66 insn per cycle - 3.347085737 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +TOTAL : 2.487888 sec +INFO: No Floating Point Exceptions have been reported + 7,940,271,110 cycles:u # 3.184 GHz (74.99%) + 40,701,330 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.98%) + 1,735,184,462 stalled-cycles-backend:u # 21.85% backend cycles idle (74.98%) + 15,494,245,719 instructions:u # 1.95 insn per cycle + # 0.11 stalled cycles per insn (74.98%) + 2.496752359 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.438466e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.794511e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.794511e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.303984 sec -INFO: No Floating Point Exceptions have been reported - 9,386,171,386 cycles # 2.837 GHz - 15,303,933,132 instructions # 1.63 insn per cycle - 3.309654062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.452484e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.752423e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.752423e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.289646 sec -INFO: No Floating Point Exceptions have been reported - 7,666,750,686 cycles # 2.328 GHz - 12,574,987,911 instructions # 1.64 insn per cycle - 3.295237837 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 981ff690e7..7010435267 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_18:09:36 -DATE: 2024-09-18_12:59:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.867533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.208256e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.038855e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.512988 sec -INFO: No Floating Point Exceptions have been reported - 5,035,172,508 cycles # 2.896 GHz - 9,178,648,119 instructions # 1.82 insn per cycle - 1.796445964 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 9.040495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.142807e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.322695e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.227617 sec +INFO: No Floating Point Exceptions have been reported + 17,194,136,336 cycles:u # 3.284 GHz (74.97%) + 204,361,213 stalled-cycles-frontend:u # 1.19% frontend cycles idle (75.08%) + 5,523,861,891 stalled-cycles-backend:u # 32.13% backend cycles idle (75.01%) + 16,198,271,603 instructions:u # 0.94 insn per cycle + # 0.34 stalled cycles per insn (74.96%) + 5.279378863 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.073817e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267606e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267606e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.232302 sec -INFO: No Floating Point Exceptions have been reported - 18,275,461,834 cycles # 2.931 GHz - 45,008,664,367 instructions # 2.46 insn per cycle - 6.237799317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.616489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.898615e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898615e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.569196 sec +INFO: No Floating Point Exceptions have been reported + 15,252,237,370 cycles:u # 3.333 GHz (75.00%) + 39,887,321 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.00%) + 465,360,550 stalled-cycles-backend:u # 3.05% backend cycles idle (75.00%) + 47,141,106,292 instructions:u # 3.09 insn per cycle + # 0.01 stalled cycles per insn (75.00%) + 4.578028940 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.243088e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.444450e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.444450e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.181850 sec -INFO: No Floating Point Exceptions have been reported - 9,350,023,781 cycles # 2.934 GHz - 22,274,333,552 instructions # 2.38 insn per cycle - 3.187507510 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.169163e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.514055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.514055e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.673673 sec +INFO: No Floating Point Exceptions have been reported + 8,598,441,727 cycles:u # 3.208 GHz (74.93%) + 38,900,163 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.93%) + 1,168,143,641 stalled-cycles-backend:u # 13.59% backend cycles idle (74.93%) + 22,476,523,422 instructions:u # 2.61 insn per cycle + # 0.05 stalled cycles per insn (74.95%) + 2.682115448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.392219e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.668104e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.668104e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.999184 sec -INFO: No Floating Point Exceptions have been reported - 8,440,748,249 cycles # 2.810 GHz - 15,754,020,269 instructions # 1.87 insn per cycle - 3.004841956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.442974e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.071928e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.071928e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.522489 sec +INFO: No Floating Point Exceptions have been reported + 8,055,832,723 cycles:u # 3.186 GHz (75.01%) + 40,025,265 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.01%) + 1,745,703,214 stalled-cycles-backend:u # 21.67% backend cycles idle (75.01%) + 15,523,601,959 instructions:u # 1.93 insn per cycle + # 0.11 stalled cycles per insn (75.01%) + 2.531311690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.422328e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.772097e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.772097e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.968153 sec -INFO: No Floating Point Exceptions have been reported - 8,367,700,869 cycles # 2.815 GHz - 15,588,459,242 instructions # 1.86 insn per cycle - 2.973858535 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.440851e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.737976e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.737976e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.952558 sec -INFO: No Floating Point Exceptions have been reported - 6,664,861,082 cycles # 2.254 GHz - 12,863,872,119 instructions # 1.93 insn per cycle - 2.958126027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 5f8c460514..f2f03e5b07 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:12:33 -DATE: 2024-09-18_12:10:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.297995e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.821835e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.125593e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584822 sec -INFO: No Floating Point Exceptions have been reported - 2,340,511,556 cycles # 2.876 GHz - 3,573,310,904 instructions # 1.53 insn per cycle - 0.872056454 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.504883e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.545624e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.777206e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.411096 sec +INFO: No Floating Point Exceptions have been reported + 1,041,962,156 cycles:u # 2.427 GHz (75.90%) + 2,518,113 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.47%) + 5,083,447 stalled-cycles-backend:u # 0.49% backend cycles idle (74.88%) + 1,816,724,710 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (74.60%) + 0.532974387 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.074691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.268219e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268219e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.225057 sec -INFO: No Floating Point Exceptions have been reported - 18,266,994,357 cycles # 2.932 GHz - 44,980,008,303 instructions # 2.46 insn per cycle - 6.230608513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.529116e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.798032e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.798032e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.841556 sec +INFO: No Floating Point Exceptions have been reported + 15,106,076,884 cycles:u # 3.115 GHz (74.97%) + 38,349,519 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.86%) + 708,992,799 stalled-cycles-backend:u # 4.69% backend cycles idle (74.90%) + 46,302,874,988 instructions:u # 3.07 insn per cycle + # 0.02 stalled cycles per insn (75.05%) + 4.891200103 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.255829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.437463e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.437463e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.162961 sec -INFO: No Floating Point Exceptions have been reported - 9,315,618,309 cycles # 2.941 GHz - 22,235,168,853 instructions # 2.39 insn per cycle - 3.168519289 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.030131e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.362311e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.362311e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.795560 sec +INFO: No Floating Point Exceptions have been reported + 8,571,133,086 cycles:u # 3.057 GHz (74.89%) + 37,424,004 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.92%) + 1,114,870,979 stalled-cycles-backend:u # 13.01% backend cycles idle (74.93%) + 22,409,821,442 instructions:u # 2.61 insn per cycle + # 0.05 stalled cycles per insn (75.05%) + 2.870522320 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1874) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.414375e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.703911e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.703911e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.977811 sec -INFO: No Floating Point Exceptions have been reported - 8,430,687,956 cycles # 2.827 GHz - 15,749,443,583 instructions # 1.87 insn per cycle - 2.983247205 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2540) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.334828e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.876181e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.876181e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.600692 sec +INFO: No Floating Point Exceptions have been reported + 7,927,308,224 cycles:u # 3.039 GHz (74.88%) + 41,121,417 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.88%) + 1,892,251,873 stalled-cycles-backend:u # 23.87% backend cycles idle (74.96%) + 15,423,363,489 instructions:u # 1.95 insn per cycle + # 0.12 stalled cycles per insn (75.11%) + 2.681605545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.463260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.781163e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.781163e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.924999 sec -INFO: No Floating Point Exceptions have been reported - 8,268,651,321 cycles # 2.823 GHz - 15,583,986,651 instructions # 1.88 insn per cycle - 2.930392056 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 10) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.442819e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.745195e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.745195e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.947951 sec -INFO: No Floating Point Exceptions have been reported - 6,669,419,569 cycles # 2.259 GHz - 12,841,335,089 instructions # 1.93 insn per cycle - 2.953404356 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 16) (512z: 1427) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052564145764E-002 -Relative difference = 1.9988585667912256e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 438f6c4f2f..7d72e2393f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:56:26 -DATE: 2024-09-18_12:43:08 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.248809e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.661013e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.608831e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.584963 sec -INFO: No Floating Point Exceptions have been reported - 2,341,003,807 cycles # 2.871 GHz - 3,637,581,249 instructions # 1.55 insn per cycle - 0.872273356 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.417071e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.141548e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.358712e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.395915 sec +INFO: No Floating Point Exceptions have been reported + 1,005,260,750 cycles:u # 2.443 GHz (74.54%) + 2,432,216 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.04%) + 5,446,718 stalled-cycles-backend:u # 0.54% backend cycles idle (74.90%) + 2,051,415,012 instructions:u # 2.04 insn per cycle + # 0.00 stalled cycles per insn (74.29%) + 0.454850741 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.610499e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.089750e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.089750e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.278573 sec -INFO: No Floating Point Exceptions have been reported - 12,205,449,516 cycles # 2.850 GHz - 32,295,858,353 instructions # 2.65 insn per cycle - 4.284066796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.168008e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.710339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.710339e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 3.581583 sec +INFO: No Floating Point Exceptions have been reported + 11,796,627,952 cycles:u # 3.287 GHz (74.92%) + 38,901,660 stalled-cycles-frontend:u # 0.33% frontend cycles idle (75.01%) + 1,791,140,796 stalled-cycles-backend:u # 15.18% backend cycles idle (75.03%) + 37,547,770,471 instructions:u # 3.18 insn per cycle + # 0.05 stalled cycles per insn (75.03%) + 3.593660266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039840314887E-002 -Relative difference = 1.244813035273009e-08 +Avg ME (F77/C++) = 1.2828039543819614E-002 +Relative difference = 3.5561191488957804e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.650495e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.446725e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.446725e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.750577 sec -INFO: No Floating Point Exceptions have been reported - 8,071,356,692 cycles # 2.929 GHz - 18,687,842,971 instructions # 2.32 insn per cycle - 2.756173554 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.868157e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.114515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.114515e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.310192 sec +INFO: No Floating Point Exceptions have been reported + 7,377,944,439 cycles:u # 3.184 GHz (74.69%) + 40,246,631 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.77%) + 212,420,701 stalled-cycles-backend:u # 2.88% backend cycles idle (75.08%) + 18,444,683,713 instructions:u # 2.50 insn per cycle + # 0.01 stalled cycles per insn (75.14%) + 2.321918385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2784) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039283704129E-002 -Relative difference = 5.583829420356249e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.785833e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615036e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.615036e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.627067 sec -INFO: No Floating Point Exceptions have been reported - 7,450,918,918 cycles # 2.831 GHz - 14,249,285,643 instructions # 1.91 insn per cycle - 2.632635594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2234) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.852910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.891146e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.891146e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.311762 sec +INFO: No Floating Point Exceptions have been reported + 7,383,263,785 cycles:u # 3.184 GHz (74.86%) + 43,163,961 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.84%) + 844,232,815 stalled-cycles-backend:u # 11.43% backend cycles idle (74.86%) + 14,155,277,183 instructions:u # 1.92 insn per cycle + # 0.06 stalled cycles per insn (75.00%) + 2.323712564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4304) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.828862e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.718189e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.718189e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.597300 sec -INFO: No Floating Point Exceptions have been reported - 7,335,966,912 cycles # 2.820 GHz - 13,949,163,288 instructions # 1.90 insn per cycle - 2.602858413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.491639e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.833175e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.833175e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.900511 sec -INFO: No Floating Point Exceptions have been reported - 6,563,891,996 cycles # 2.259 GHz - 13,436,075,613 instructions # 2.05 insn per cycle - 2.906157600 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1201) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052562326775E-002 -Relative difference = 1.997440588685788e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 2bd01da79a..62e2a08489 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:56:38 -DATE: 2024-09-18_12:43:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.260194e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.691839e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.932675e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.585616 sec -INFO: No Floating Point Exceptions have been reported - 2,337,485,665 cycles # 2.875 GHz - 3,652,863,320 instructions # 1.56 insn per cycle - 0.871732359 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.521380e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.682628e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.929767e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.395671 sec +INFO: No Floating Point Exceptions have been reported + 984,202,112 cycles:u # 2.388 GHz (75.12%) + 2,551,855 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.43%) + 8,367,573 stalled-cycles-backend:u # 0.85% backend cycles idle (74.31%) + 1,973,107,317 instructions:u # 2.00 insn per cycle + # 0.00 stalled cycles per insn (75.73%) + 0.452530501 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.208067e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.235106e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.235106e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.219491 sec -INFO: No Floating Point Exceptions have been reported - 9,405,085,609 cycles # 2.917 GHz - 25,703,807,777 instructions # 2.73 insn per cycle - 3.224847546 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.049505e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.245124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.245124e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 2.745841 sec +INFO: No Floating Point Exceptions have been reported + 8,844,760,560 cycles:u # 3.213 GHz (75.01%) + 36,071,629 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.01%) + 30,726,186 stalled-cycles-backend:u # 0.35% backend cycles idle (75.01%) + 28,561,504,774 instructions:u # 3.23 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 2.757605366 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039838495897E-002 -Relative difference = 1.2589928273811243e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.972603e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.428852e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.428852e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.491785 sec -INFO: No Floating Point Exceptions have been reported - 7,313,494,275 cycles # 2.930 GHz - 16,767,205,281 instructions # 2.29 insn per cycle - 2.497135576 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.237815e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103867e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103867e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.165380 sec +INFO: No Floating Point Exceptions have been reported + 6,859,752,406 cycles:u # 3.158 GHz (74.99%) + 37,632,318 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.96%) + 31,490,359 stalled-cycles-backend:u # 0.46% backend cycles idle (74.96%) + 16,583,709,220 instructions:u # 2.42 insn per cycle + # 0.00 stalled cycles per insn (74.96%) + 2.177299928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2423) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.941057e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.047750e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.047750e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.511970 sec -INFO: No Floating Point Exceptions have been reported - 7,127,612,921 cycles # 2.833 GHz - 13,657,719,583 instructions # 1.92 insn per cycle - 2.517264213 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2067) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.055916e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.396389e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.396389e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.226900 sec +INFO: No Floating Point Exceptions have been reported + 7,078,990,378 cycles:u # 3.169 GHz (74.96%) + 42,150,010 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.93%) + 690,079,947 stalled-cycles-backend:u # 9.75% backend cycles idle (74.93%) + 13,524,313,161 instructions:u # 1.91 insn per cycle + # 0.05 stalled cycles per insn (74.93%) + 2.238496994 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3983) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 +Avg ME (F77/C++) = 1.2828053349949187E-002 +Relative difference = 2.611425108340261e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.994854e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.186874e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.186874e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.472132 sec -INFO: No Floating Point Exceptions have been reported - 7,033,406,697 cycles # 2.840 GHz - 13,451,133,295 instructions # 1.91 insn per cycle - 2.477643200 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1935) (512y: 7) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.610829e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.126124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.126124e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.783843 sec -INFO: No Floating Point Exceptions have been reported - 6,358,284,694 cycles # 2.280 GHz - 13,173,247,957 instructions # 2.07 insn per cycle - 2.789438831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 2) (512z: 1081) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052536860923E-002 -Relative difference = 1.977588895209662e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 041f4e9efd..3a3e5d3344 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:12:47 -DATE: 2024-09-18_12:10:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.877042e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.852998e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.677390 sec -INFO: No Floating Point Exceptions have been reported - 2,627,954,813 cycles # 2.876 GHz - 4,055,520,615 instructions # 1.54 insn per cycle - 0.972709824 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.207835e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.857313e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.975718e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.562002 sec +INFO: No Floating Point Exceptions have been reported + 1,414,994,194 cycles:u # 2.507 GHz (75.09%) + 2,475,501 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.82%) + 6,282,643 stalled-cycles-backend:u # 0.44% backend cycles idle (75.76%) + 2,186,124,848 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (75.94%) + 0.726771283 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590281E-002 +Relative difference = 7.67145406542181e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.020205e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.187124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.187124e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.577075 sec -INFO: No Floating Point Exceptions have been reported - 19,371,933,844 cycles # 2.944 GHz - 46,278,733,907 instructions # 2.39 insn per cycle - 6.582537613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.326045e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.520190e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.520190e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.516829 sec +INFO: No Floating Point Exceptions have been reported + 17,688,420,445 cycles:u # 3.199 GHz (75.04%) + 49,553,059 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.99%) + 144,397,969 stalled-cycles-backend:u # 0.82% backend cycles idle (74.97%) + 47,436,113,566 instructions:u # 2.68 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 5.615037353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 454) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.635520e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155996e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155996e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.262104 sec -INFO: No Floating Point Exceptions have been reported - 12,531,950,606 cycles # 2.937 GHz - 31,465,132,198 instructions # 2.51 insn per cycle - 4.267832274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.960845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.461824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.461824e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.988117 sec +INFO: No Floating Point Exceptions have been reported + 12,462,936,271 cycles:u # 3.115 GHz (74.95%) + 49,873,898 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.01%) + 1,140,737,344 stalled-cycles-backend:u # 9.15% backend cycles idle (75.05%) + 31,401,629,715 instructions:u # 2.52 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 4.108231429 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1704) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.976062e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.756066e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.756066e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.592683 sec -INFO: No Floating Point Exceptions have been reported - 10,114,837,946 cycles # 2.812 GHz - 19,479,113,850 instructions # 1.93 insn per cycle - 3.598394582 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.603945e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497171e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497171e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.218211 sec +INFO: No Floating Point Exceptions have been reported + 9,777,394,640 cycles:u # 3.043 GHz (75.02%) + 53,114,088 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.10%) + 278,323,224 stalled-cycles-backend:u # 2.85% backend cycles idle (75.10%) + 19,375,783,973 instructions:u # 1.98 insn per cycle + # 0.01 stalled cycles per insn (75.10%) + 3.314291243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.011048e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.815376e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.815376e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.535103 sec -INFO: No Floating Point Exceptions have been reported - 9,996,837,570 cycles # 2.824 GHz - 19,291,566,393 instructions # 1.93 insn per cycle - 3.540686440 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1799) (512y: 188) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.782393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.383775e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.383775e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.939537 sec -INFO: No Floating Point Exceptions have been reported - 8,379,017,732 cycles # 2.125 GHz - 15,108,594,606 instructions # 1.80 insn per cycle - 3.945372714 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 966) (512y: 154) (512z: 1330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 63e5511d98..60b29b29bf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-09-18_17:13:05 -DATE: 2024-09-18_12:11:04 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.941580e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.659467e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829628e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.674748 sec -INFO: No Floating Point Exceptions have been reported - 2,621,919,128 cycles # 2.880 GHz - 4,081,332,751 instructions # 1.56 insn per cycle - 0.969735396 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.524426e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.496465e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.642073e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.533662 sec +INFO: No Floating Point Exceptions have been reported + 1,327,312,195 cycles:u # 2.382 GHz (75.23%) + 2,544,054 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.56%) + 6,691,894 stalled-cycles-backend:u # 0.50% backend cycles idle (76.41%) + 2,218,110,852 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (74.79%) + 0.711069615 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590284E-002 +Relative difference = 7.67145379496374e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.022324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.188868e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.188868e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.564992 sec -INFO: No Floating Point Exceptions have been reported - 19,266,332,416 cycles # 2.933 GHz - 46,212,690,278 instructions # 2.40 insn per cycle - 6.570664425 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.320736e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.517152e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.517152e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.542087 sec +INFO: No Floating Point Exceptions have been reported + 17,673,617,486 cycles:u # 3.182 GHz (74.96%) + 49,446,270 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.95%) + 788,224,990 stalled-cycles-backend:u # 4.46% backend cycles idle (74.96%) + 46,948,710,948 instructions:u # 2.66 insn per cycle + # 0.02 stalled cycles per insn (75.04%) + 5.631949639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.631635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.147723e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.147723e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.273260 sec -INFO: No Floating Point Exceptions have been reported - 12,565,193,084 cycles # 2.937 GHz - 31,464,303,429 instructions # 2.50 insn per cycle - 4.278983280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.087278e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.649501e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649501e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.767539 sec +INFO: No Floating Point Exceptions have been reported + 12,175,180,167 cycles:u # 3.221 GHz (75.03%) + 49,757,175 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.03%) + 322,071,355 stalled-cycles-backend:u # 2.65% backend cycles idle (75.03%) + 31,096,570,929 instructions:u # 2.55 insn per cycle + # 0.01 stalled cycles per insn (75.03%) + 3.866707373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1654) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.965569e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.737055e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.737055e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.608853 sec -INFO: No Floating Point Exceptions have been reported - 10,149,451,908 cycles # 2.809 GHz - 19,494,245,478 instructions # 1.92 insn per cycle - 3.614638314 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.775239e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.738743e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.738743e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.006678 sec +INFO: No Floating Point Exceptions have been reported + 9,682,545,011 cycles:u # 3.207 GHz (75.02%) + 51,386,803 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.90%) + 648,242,102 stalled-cycles-backend:u # 6.69% backend cycles idle (74.86%) + 19,228,837,022 instructions:u # 1.99 insn per cycle + # 0.03 stalled cycles per insn (74.99%) + 3.117508436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2008) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.020584e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.826510e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.826510e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.523442 sec -INFO: No Floating Point Exceptions have been reported - 9,922,226,767 cycles # 2.813 GHz - 19,194,396,105 instructions # 1.93 insn per cycle - 3.529032291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1766) (512y: 191) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.850816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505094e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.505094e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.808968 sec -INFO: No Floating Point Exceptions have been reported - 8,221,926,837 cycles # 2.156 GHz - 14,966,457,412 instructions # 1.82 insn per cycle - 3.814643788 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 959) (512y: 155) (512z: 1296) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index d77862b8c7..ab6dc5f81d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:13:22 -DATE: 2024-09-18_12:11:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.432691e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.350673e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.001727e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.536474 sec -INFO: No Floating Point Exceptions have been reported - 2,210,506,804 cycles # 2.873 GHz - 3,172,337,100 instructions # 1.44 insn per cycle - 0.829286366 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.934931e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.471034e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.491919e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.419804 sec +INFO: No Floating Point Exceptions have been reported + 996,714,034 cycles:u # 2.341 GHz (76.67%) + 2,409,644 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.26%) + 6,076,059 stalled-cycles-backend:u # 0.61% backend cycles idle (74.79%) + 1,539,514,328 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (74.46%) + 0.546995956 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.830003e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.876741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.876741e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.837147 sec -INFO: No Floating Point Exceptions have been reported - 17,232,906,357 cycles # 2.950 GHz - 45,930,941,627 instructions # 2.67 insn per cycle - 5.842851386 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.565193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.630775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.630775e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.274033 sec +INFO: No Floating Point Exceptions have been reported + 14,472,479,383 cycles:u # 3.376 GHz (75.09%) + 8,913,402 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) + 3,647,031,319 stalled-cycles-backend:u # 25.20% backend cycles idle (75.00%) + 45,489,617,307 instructions:u # 3.14 insn per cycle + # 0.08 stalled cycles per insn (75.00%) + 4.346418121 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.213968e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.373677e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.373677e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.369805 sec -INFO: No Floating Point Exceptions have been reported - 9,944,028,092 cycles # 2.947 GHz - 27,848,243,801 instructions # 2.80 insn per cycle - 3.375396234 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.282132e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.475021e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.475021e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.634395 sec +INFO: No Floating Point Exceptions have been reported + 8,806,052,324 cycles:u # 3.326 GHz (74.93%) + 7,897,470 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) + 2,700,862,590 stalled-cycles-backend:u # 30.67% backend cycles idle (74.95%) + 27,875,711,410 instructions:u # 3.17 insn per cycle + # 0.10 stalled cycles per insn (74.93%) + 2.706017261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.005348e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.393032e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.393032e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.203017 sec -INFO: No Floating Point Exceptions have been reported - 6,092,356,881 cycles # 2.759 GHz - 12,580,147,933 instructions # 2.06 insn per cycle - 2.208781826 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.238804e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769999e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.769999e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.630385 sec +INFO: No Floating Point Exceptions have been reported + 5,391,944,056 cycles:u # 3.281 GHz (74.52%) + 8,358,752 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.82%) + 121,036,170 stalled-cycles-backend:u # 2.24% backend cycles idle (75.06%) + 12,304,338,508 instructions:u # 2.28 insn per cycle + # 0.01 stalled cycles per insn (75.18%) + 1.717953566 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.533405e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.010418e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.010418e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.002945 sec -INFO: No Floating Point Exceptions have been reported - 5,570,120,100 cycles # 2.774 GHz - 12,019,792,186 instructions # 2.16 insn per cycle - 2.008867487 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.539179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.725857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.725857e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.069457 sec -INFO: No Floating Point Exceptions have been reported - 5,709,813,977 cycles # 1.857 GHz - 8,292,916,903 instructions # 1.45 insn per cycle - 3.075340516 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index ac7eb7abb8..0f2ff73fb0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:06:19 -DATE: 2024-09-18_12:52:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.492890e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.985153e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.985153e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.825573 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,089,630,348 cycles # 2.884 GHz - 4,704,003,879 instructions # 1.52 insn per cycle - 1.129956624 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.938623e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.960064e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960064e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.244964 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,763,093,175 cycles:u # 2.951 GHz (74.98%) + 21,833,946 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.92%) + 1,143,029,876 stalled-cycles-backend:u # 30.37% backend cycles idle (74.71%) + 3,908,529,958 instructions:u # 1.04 insn per cycle + # 0.29 stalled cycles per insn (74.72%) + 1.316004936 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.810470e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.856841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.856841e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.979891 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,636,224,981 cycles # 2.947 GHz - 46,002,491,255 instructions # 2.61 insn per cycle - 5.986641580 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.558947e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.636556e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.636556e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.370599 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 14,698,313,294 cycles:u # 3.347 GHz (74.93%) + 7,113,957 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) + 3,662,135,115 stalled-cycles-backend:u # 24.92% backend cycles idle (75.04%) + 45,571,381,654 instructions:u # 3.10 insn per cycle + # 0.08 stalled cycles per insn (75.06%) + 4.396217494 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.162709e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.318081e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.318081e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.508343 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,301,947,786 cycles # 2.931 GHz - 28,031,926,381 instructions # 2.72 insn per cycle - 3.516023780 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.249503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.443162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.443162e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.741321 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,003,374,826 cycles:u # 3.261 GHz (74.86%) + 8,607,866 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.82%) + 2,678,543,116 stalled-cycles-backend:u # 29.75% backend cycles idle (74.97%) + 27,795,856,321 instructions:u # 3.09 insn per cycle + # 0.10 stalled cycles per insn (75.09%) + 2.766131127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.911481e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.286736e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.286736e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.328474 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,480,879,664 cycles # 2.775 GHz - 12,869,228,758 instructions # 1.99 insn per cycle - 2.336129053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.176793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.686997e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.686997e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.730140 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,523,660,055 cycles:u # 3.156 GHz (74.91%) + 7,829,261 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.87%) + 124,702,269 stalled-cycles-backend:u # 2.26% backend cycles idle (74.87%) + 12,548,851,516 instructions:u # 2.27 insn per cycle + # 0.01 stalled cycles per insn (74.87%) + 1.755657916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.391900e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.844641e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.844641e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.137350 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,935,186,233 cycles # 2.768 GHz - 12,309,185,637 instructions # 2.07 insn per cycle - 2.144981542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.478793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.660239e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.660239e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.208412 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,086,695,352 cycles # 1.893 GHz - 8,539,357,346 instructions # 1.40 insn per cycle - 3.215882461 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 43a1422029..4213b36877 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:11:43 -DATE: 2024-09-18_13:05:13 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.294862e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.316742e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.978216e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.937991e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510467e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532029e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.632097 sec -INFO: No Floating Point Exceptions have been reported - 2,509,027,611 cycles # 2.881 GHz - 3,623,648,413 instructions # 1.44 insn per cycle - 0.928416005 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 1.108240 sec +INFO: No Floating Point Exceptions have been reported + 3,284,871,136 cycles:u # 2.959 GHz (74.89%) + 10,892,051 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.92%) + 1,128,730,546 stalled-cycles-backend:u # 34.36% backend cycles idle (74.43%) + 3,078,096,208 instructions:u # 0.94 insn per cycle + # 0.37 stalled cycles per insn (74.73%) + 1.167524787 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.823170e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.870043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.870043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.580637e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.643728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.643728e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.922002 sec -INFO: No Floating Point Exceptions have been reported - 17,445,049,338 cycles # 2.943 GHz - 45,950,504,380 instructions # 2.63 insn per cycle - 5.927754556 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.248465 sec +INFO: No Floating Point Exceptions have been reported + 14,464,635,032 cycles:u # 3.396 GHz (75.03%) + 8,722,841 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) + 3,642,256,779 stalled-cycles-backend:u # 25.18% backend cycles idle (75.02%) + 45,553,728,102 instructions:u # 3.15 insn per cycle + # 0.08 stalled cycles per insn (75.02%) + 4.261541371 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.206424e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368310e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.199915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.383224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.383224e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.440040 sec -INFO: No Floating Point Exceptions have been reported - 10,134,263,801 cycles # 2.942 GHz - 27,846,437,463 instructions # 2.75 insn per cycle - 3.446069209 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.678911 sec +INFO: No Floating Point Exceptions have been reported + 9,019,928,220 cycles:u # 3.353 GHz (75.03%) + 9,261,975 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.03%) + 2,696,360,899 stalled-cycles-backend:u # 29.89% backend cycles idle (75.02%) + 27,740,633,647 instructions:u # 3.08 insn per cycle + # 0.10 stalled cycles per insn (75.02%) + 2.692188557 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.972916e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.355947e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.355947e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.285364e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.823103e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.823103e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.278924 sec -INFO: No Floating Point Exceptions have been reported - 6,293,574,887 cycles # 2.756 GHz - 12,563,410,868 instructions # 2.00 insn per cycle - 2.284852020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +TOTAL : 1.624112 sec +INFO: No Floating Point Exceptions have been reported + 5,332,952,658 cycles:u # 3.261 GHz (75.00%) + 8,043,121 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.05%) + 105,121,534 stalled-cycles-backend:u # 1.97% backend cycles idle (75.05%) + 12,312,123,293 instructions:u # 2.31 insn per cycle + # 0.01 stalled cycles per insn (75.05%) + 1.637347721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.484228e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.952695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.952695e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.082886 sec -INFO: No Floating Point Exceptions have been reported - 5,796,540,715 cycles # 2.776 GHz - 11,970,685,605 instructions # 2.07 insn per cycle - 2.088838177 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.533537e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.719277e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.719277e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.139756 sec -INFO: No Floating Point Exceptions have been reported - 5,897,468,368 cycles # 1.875 GHz - 8,242,833,828 instructions # 1.40 insn per cycle - 3.145931095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 06cd2419c8..ac29a8b745 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:09:54 -DATE: 2024-09-18_12:59:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.799214e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.349130e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.974876e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.720471 sec -INFO: No Floating Point Exceptions have been reported - 2,751,096,444 cycles # 2.885 GHz - 4,339,626,159 instructions # 1.58 insn per cycle - 1.011336555 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.803304e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.473400e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.494865e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.202087 sec +INFO: No Floating Point Exceptions have been reported + 3,619,095,250 cycles:u # 2.982 GHz (75.32%) + 21,746,240 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.63%) + 1,112,138,546 stalled-cycles-backend:u # 30.73% backend cycles idle (75.31%) + 3,845,495,935 instructions:u # 1.06 insn per cycle + # 0.29 stalled cycles per insn (74.68%) + 1.265347938 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.816637e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.863801e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.863801e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.879265 sec -INFO: No Floating Point Exceptions have been reported - 17,261,479,917 cycles # 2.934 GHz - 45,935,121,768 instructions # 2.66 insn per cycle - 5.884988360 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.555412e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.618727e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.618727e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.291123 sec +INFO: No Floating Point Exceptions have been reported + 14,460,345,395 cycles:u # 3.361 GHz (74.92%) + 9,128,081 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) + 3,671,357,888 stalled-cycles-backend:u # 25.39% backend cycles idle (75.00%) + 45,435,832,585 instructions:u # 3.14 insn per cycle + # 0.08 stalled cycles per insn (75.08%) + 4.304537317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.202828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.362707e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.362707e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.382024 sec -INFO: No Floating Point Exceptions have been reported - 9,945,427,320 cycles # 2.936 GHz - 27,847,352,314 instructions # 2.80 insn per cycle - 3.387994978 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.287633e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.481280e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.481280e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.630521 sec +INFO: No Floating Point Exceptions have been reported + 8,794,046,271 cycles:u # 3.330 GHz (74.89%) + 7,806,877 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.89%) + 2,687,178,672 stalled-cycles-backend:u # 30.56% backend cycles idle (74.86%) + 27,867,079,455 instructions:u # 3.17 insn per cycle + # 0.10 stalled cycles per insn (74.98%) + 2.643204398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.949448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.331919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.331919e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.228024 sec -INFO: No Floating Point Exceptions have been reported - 6,117,137,090 cycles # 2.739 GHz - 12,580,569,234 instructions # 2.06 insn per cycle - 2.234097878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.253032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.782725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.782725e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.628819 sec +INFO: No Floating Point Exceptions have been reported + 5,370,533,081 cycles:u # 3.276 GHz (74.73%) + 8,299,845 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.98%) + 95,736,243 stalled-cycles-backend:u # 1.78% backend cycles idle (75.12%) + 12,295,237,724 instructions:u # 2.29 insn per cycle + # 0.01 stalled cycles per insn (75.12%) + 1.641761550 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.342003e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.785664e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.785664e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.072519 sec -INFO: No Floating Point Exceptions have been reported - 5,591,470,515 cycles # 2.691 GHz - 12,020,476,993 instructions # 2.15 insn per cycle - 2.078517041 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.530876e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.717281e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.717281e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.077997 sec -INFO: No Floating Point Exceptions have been reported - 5,702,073,376 cycles # 1.850 GHz - 8,294,780,221 instructions # 1.45 insn per cycle - 3.083993360 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index a4f203143e..7b8f0592a7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:13:35 -DATE: 2024-09-18_12:11:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.820817e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.978279e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.339111e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.700005 sec -INFO: No Floating Point Exceptions have been reported - 2,762,648,255 cycles # 2.857 GHz - 3,086,101,973 instructions # 1.12 insn per cycle - 1.026825767 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.966399e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.504545e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.526121e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.411007 sec +INFO: No Floating Point Exceptions have been reported + 955,029,396 cycles:u # 2.188 GHz (74.48%) + 2,514,775 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.03%) + 5,592,491 stalled-cycles-backend:u # 0.59% backend cycles idle (74.83%) + 1,547,862,874 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (75.44%) + 0.575051879 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.875216e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.924982e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.924982e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.700425 sec -INFO: No Floating Point Exceptions have been reported - 16,757,702,666 cycles # 2.937 GHz - 44,923,641,547 instructions # 2.68 insn per cycle - 5.706326125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.508523e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.572646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.572646e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.371058 sec +INFO: No Floating Point Exceptions have been reported + 14,128,475,872 cycles:u # 3.222 GHz (75.00%) + 9,065,194 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 289,306,543 stalled-cycles-backend:u # 2.05% backend cycles idle (75.01%) + 44,427,266,143 instructions:u # 3.14 insn per cycle + # 0.01 stalled cycles per insn (75.01%) + 4.459993433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.370762e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.546946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.546946e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.219225 sec -INFO: No Floating Point Exceptions have been reported - 9,494,791,570 cycles # 2.945 GHz - 26,687,379,503 instructions # 2.81 insn per cycle - 3.225069589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.430399e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.647944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.647944e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.562854 sec +INFO: No Floating Point Exceptions have been reported + 8,369,699,777 cycles:u # 3.249 GHz (74.89%) + 9,515,612 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.87%) + 671,847,691 stalled-cycles-backend:u # 8.03% backend cycles idle (74.85%) + 26,789,411,251 instructions:u # 3.20 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 2.664751352 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2266) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.607569e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.929909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.929909e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.383333 sec -INFO: No Floating Point Exceptions have been reported - 6,604,949,302 cycles # 2.766 GHz - 14,119,001,234 instructions # 2.14 insn per cycle - 2.388928721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.526556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.948550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.948550e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.788592 sec +INFO: No Floating Point Exceptions have been reported + 5,941,360,097 cycles:u # 3.298 GHz (74.70%) + 9,310,250 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.92%) + 1,408,571,969 stalled-cycles-backend:u # 23.71% backend cycles idle (75.14%) + 14,136,602,484 instructions:u # 2.38 insn per cycle + # 0.10 stalled cycles per insn (75.14%) + 1.845020820 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.803756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.157173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.157173e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.289999 sec -INFO: No Floating Point Exceptions have been reported - 6,348,634,731 cycles # 2.767 GHz - 13,715,767,912 instructions # 2.16 insn per cycle - 2.295499005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 298) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.387276e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.557456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.557456e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.203296 sec -INFO: No Floating Point Exceptions have been reported - 5,911,433,799 cycles # 1.843 GHz - 10,058,967,230 instructions # 1.70 insn per cycle - 3.209029605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 797e37fdb1..a9445d3e36 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:56:49 -DATE: 2024-09-18_12:43:53 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.310192e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.359217e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.986325e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.536645 sec -INFO: No Floating Point Exceptions have been reported - 2,216,199,851 cycles # 2.870 GHz - 3,159,776,582 instructions # 1.43 insn per cycle - 0.831121874 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.959257e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.482289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.503835e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.401016 sec +INFO: No Floating Point Exceptions have been reported + 982,588,415 cycles:u # 2.335 GHz (74.23%) + 2,633,099 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.51%) + 7,514,039 stalled-cycles-backend:u # 0.76% backend cycles idle (74.87%) + 1,557,120,082 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.65%) + 0.460989527 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.421869e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.505515e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.505515e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.438528 sec -INFO: No Floating Point Exceptions have been reported - 13,015,204,187 cycles # 2.929 GHz - 34,341,759,533 instructions # 2.64 insn per cycle - 4.444441151 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.938860e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.021462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.021462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.751082 sec +INFO: No Floating Point Exceptions have been reported + 12,709,283,737 cycles:u # 3.378 GHz (74.93%) + 8,556,908 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) + 4,229,942,104 stalled-cycles-backend:u # 33.28% backend cycles idle (74.93%) + 35,277,140,070 instructions:u # 2.78 insn per cycle + # 0.12 stalled cycles per insn (74.95%) + 3.767075266 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 885) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.982901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.119934e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.119934e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.624625 sec -INFO: No Floating Point Exceptions have been reported - 10,679,803,279 cycles # 2.942 GHz - 24,245,188,333 instructions # 2.27 insn per cycle - 3.630600501 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.584654e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.803796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.803796e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.471773 sec +INFO: No Floating Point Exceptions have been reported + 8,280,641,404 cycles:u # 3.335 GHz (74.90%) + 9,308,773 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.90%) + 1,538,273,950 stalled-cycles-backend:u # 18.58% backend cycles idle (74.91%) + 21,677,236,050 instructions:u # 2.62 insn per cycle + # 0.07 stalled cycles per insn (75.08%) + 2.545937944 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.555816e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.876140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.876140e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.410266 sec -INFO: No Floating Point Exceptions have been reported - 6,676,895,845 cycles # 2.765 GHz - 12,404,391,789 instructions # 1.86 insn per cycle - 2.415872101 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.688766e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.149893e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.149893e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.751078 sec +INFO: No Floating Point Exceptions have been reported + 5,754,161,838 cycles:u # 3.264 GHz (74.89%) + 8,027,521 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.04%) + 1,733,341,378 stalled-cycles-backend:u # 30.12% backend cycles idle (75.04%) + 12,004,181,265 instructions:u # 2.09 insn per cycle + # 0.14 stalled cycles per insn (75.04%) + 1.767279218 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3012) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.932497e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.306284e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.306284e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.233356 sec -INFO: No Floating Point Exceptions have been reported - 6,172,218,152 cycles # 2.758 GHz - 11,544,853,425 instructions # 1.87 insn per cycle - 2.239017897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2644) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.760390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.970863e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.970863e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.895519 sec -INFO: No Floating Point Exceptions have been reported - 5,386,476,820 cycles # 1.857 GHz - 9,291,001,680 instructions # 1.72 insn per cycle - 2.901312030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2099) (512y: 282) (512z: 1958) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index af0c8fa098..5f12e2193a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:57:01 -DATE: 2024-09-18_12:44:17 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.294016e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.195619e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.822974e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.537359 sec -INFO: No Floating Point Exceptions have been reported - 2,212,895,393 cycles # 2.861 GHz - 3,167,520,059 instructions # 1.43 insn per cycle - 0.832101772 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.981203e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.539251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562219e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.393786 sec +INFO: No Floating Point Exceptions have been reported + 981,751,203 cycles:u # 2.364 GHz (75.65%) + 2,492,719 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.20%) + 6,301,960 stalled-cycles-backend:u # 0.64% backend cycles idle (76.56%) + 1,593,527,761 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 0.457304440 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.565164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.657330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.657330e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.194388 sec -INFO: No Floating Point Exceptions have been reported - 12,320,787,698 cycles # 2.934 GHz - 34,912,998,062 instructions # 2.83 insn per cycle - 4.200192046 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.634816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.764509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764509e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.066427 sec +INFO: No Floating Point Exceptions have been reported + 10,350,634,816 cycles:u # 3.363 GHz (74.92%) + 9,207,673 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.04%) + 15,199,377 stalled-cycles-backend:u # 0.15% backend cycles idle (75.05%) + 34,607,039,252 instructions:u # 3.34 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 3.083652986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.989812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.127480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.127480e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.616035 sec -INFO: No Floating Point Exceptions have been reported - 10,626,604,482 cycles # 2.935 GHz - 23,338,496,545 instructions # 2.20 insn per cycle - 3.621790672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.984118e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.247895e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.247895e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.287179 sec +INFO: No Floating Point Exceptions have been reported + 7,639,182,299 cycles:u # 3.324 GHz (75.02%) + 9,475,513 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.95%) + 1,914,637,161 stalled-cycles-backend:u # 25.06% backend cycles idle (74.94%) + 21,134,965,478 instructions:u # 2.77 insn per cycle + # 0.09 stalled cycles per insn (74.97%) + 2.303285175 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.054894e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.447738e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.447738e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.181056 sec -INFO: No Floating Point Exceptions have been reported - 6,051,059,717 cycles # 2.768 GHz - 11,860,809,289 instructions # 1.96 insn per cycle - 2.186772408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2468) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.310818e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.845191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.845191e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.615040 sec +INFO: No Floating Point Exceptions have been reported + 5,309,461,499 cycles:u # 3.264 GHz (74.72%) + 8,747,447 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.92%) + 1,032,040,214 stalled-cycles-backend:u # 19.44% backend cycles idle (74.96%) + 11,420,261,490 instructions:u # 2.15 insn per cycle + # 0.09 stalled cycles per insn (74.96%) + 1.631528490 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2332) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.028106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.414371e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.414371e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.192869 sec -INFO: No Floating Point Exceptions have been reported - 6,064,121,206 cycles # 2.759 GHz - 11,098,432,522 instructions # 1.83 insn per cycle - 2.198761953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2098) (512y: 174) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.876416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.107845e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.107845e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.813537 sec -INFO: No Floating Point Exceptions have been reported - 5,237,838,464 cycles # 1.858 GHz - 9,015,066,552 instructions # 1.72 insn per cycle - 2.819357375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1567) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 0cce370026..843b1434d8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:13:48 -DATE: 2024-09-18_12:12:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.285654e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.744544e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.855248e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.489810 sec -INFO: No Floating Point Exceptions have been reported - 2,058,086,051 cycles # 2.871 GHz - 2,937,778,801 instructions # 1.43 insn per cycle - 0.774500335 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.003234e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159945e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.183678e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.343481 sec +INFO: No Floating Point Exceptions have been reported + 800,538,383 cycles:u # 2.218 GHz (73.03%) + 2,346,963 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.25%) + 6,715,861 stalled-cycles-backend:u # 0.84% backend cycles idle (74.73%) + 1,500,212,452 instructions:u # 1.87 insn per cycle + # 0.00 stalled cycles per insn (74.46%) + 0.497087625 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.924099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.978298e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.978298e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.535123 sec -INFO: No Floating Point Exceptions have been reported - 16,260,554,497 cycles # 2.935 GHz - 45,332,637,380 instructions # 2.79 insn per cycle - 5.540566072 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.920924e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.010747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.010747e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.736629 sec +INFO: No Floating Point Exceptions have been reported + 12,772,799,800 cycles:u # 3.410 GHz (74.97%) + 6,438,869 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) + 38,957,089 stalled-cycles-backend:u # 0.31% backend cycles idle (75.02%) + 45,402,799,814 instructions:u # 3.55 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 3.842668704 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.537932e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.874791e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.874791e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.397466 sec -INFO: No Floating Point Exceptions have been reported - 7,088,165,806 cycles # 2.951 GHz - 17,790,594,363 instructions # 2.51 insn per cycle - 2.403188687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.004887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.369552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.369552e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.893575 sec +INFO: No Floating Point Exceptions have been reported + 6,329,966,260 cycles:u # 3.327 GHz (74.89%) + 5,942,225 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.85%) + 2,806,250,644 stalled-cycles-backend:u # 44.33% backend cycles idle (74.81%) + 17,176,219,337 instructions:u # 2.71 insn per cycle + # 0.16 stalled cycles per insn (74.92%) + 2.089375786 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.392634e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.540507e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.540507e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.334609 sec -INFO: No Floating Point Exceptions have been reported - 3,736,094,091 cycles # 2.789 GHz - 8,261,313,611 instructions # 2.21 insn per cycle - 1.340132908 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.176093e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.317775e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.037790 sec +INFO: No Floating Point Exceptions have been reported + 3,374,015,291 cycles:u # 3.224 GHz (75.06%) + 6,964,114 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.84%) + 1,080,311,721 stalled-cycles-backend:u # 32.02% backend cycles idle (74.78%) + 8,107,806,529 instructions:u # 2.40 insn per cycle + # 0.13 stalled cycles per insn (74.84%) + 1.139350959 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.862239e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.012505e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.012505e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.268798 sec -INFO: No Floating Point Exceptions have been reported - 3,543,869,427 cycles # 2.783 GHz - 7,911,503,214 instructions # 2.23 insn per cycle - 1.274261347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.491068e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.141806e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.141806e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.701721 sec -INFO: No Floating Point Exceptions have been reported - 3,270,419,298 cycles # 1.917 GHz - 6,095,745,028 instructions # 1.86 insn per cycle - 1.707211646 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 5e7502fc17..bcb4d19bce 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:06:32 -DATE: 2024-09-18_12:53:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.022210e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.414163e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.414163e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.683925 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,644,974,332 cycles # 2.886 GHz - 4,089,078,726 instructions # 1.55 insn per cycle - 0.974029218 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.807509e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.983639e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.983639e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.162866 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,552,364,031 cycles:u # 2.994 GHz (74.52%) + 37,548,960 stalled-cycles-frontend:u # 1.06% frontend cycles idle (75.08%) + 1,127,077,852 stalled-cycles-backend:u # 31.73% backend cycles idle (74.93%) + 3,817,045,743 instructions:u # 1.07 insn per cycle + # 0.30 stalled cycles per insn (75.31%) + 1.224889095 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.927229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.981708e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.981708e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.572760 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 16,435,796,229 cycles # 2.946 GHz - 45,376,812,282 instructions # 2.76 insn per cycle - 5.580128034 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.950552e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.034859e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.034859e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.742410 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 12,696,850,045 cycles:u # 3.381 GHz (74.90%) + 6,935,878 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.98%) + 28,501,944 stalled-cycles-backend:u # 0.22% backend cycles idle (75.08%) + 45,502,459,168 instructions:u # 3.58 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 3.837658112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.483217e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.814609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.814609e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.475211 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,297,746,086 cycles # 2.941 GHz - 18,073,033,530 instructions # 2.48 insn per cycle - 2.482430942 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.203067e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.592213e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.592213e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.883234 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,265,430,020 cycles:u # 3.305 GHz (74.76%) + 6,791,933 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.83%) + 2,590,936,624 stalled-cycles-backend:u # 41.35% backend cycles idle (75.04%) + 17,234,116,973 instructions:u # 2.75 insn per cycle + # 0.15 stalled cycles per insn (75.10%) + 1.900512135 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.199525e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.300829e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.300829e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.415446 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,953,896,804 cycles # 2.781 GHz - 8,500,905,843 instructions # 2.15 insn per cycle - 1.422523843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.171349e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.310974e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.310974e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.085681 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,491,792,899 cycles:u # 3.179 GHz (74.52%) + 7,299,229 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.85%) + 1,090,281,496 stalled-cycles-backend:u # 31.22% backend cycles idle (75.21%) + 8,272,960,237 instructions:u # 2.37 insn per cycle + # 0.13 stalled cycles per insn (75.24%) + 1.103698634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.608107e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.919736e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.919736e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.364816 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,828,677,143 cycles # 2.793 GHz - 8,155,232,689 instructions # 2.13 insn per cycle - 1.371531073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.398900e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.033073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.033073e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.777483 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,485,580,348 cycles # 1.954 GHz - 6,352,386,091 instructions # 1.82 insn per cycle - 1.784705241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 7b3bdcf221..ae32bb5481 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:11:56 -DATE: 2024-09-18_13:05:38 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.256953e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.707995e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.827629e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.581626 sec -INFO: No Floating Point Exceptions have been reported - 2,320,591,922 cycles # 2.873 GHz - 3,370,044,879 instructions # 1.45 insn per cycle - 0.865525838 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.809931e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.224979e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251448e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.080340e+00 +- 3.470037e-03 ) GeV^0 +TOTAL : 1.024141 sec +INFO: No Floating Point Exceptions have been reported + 3,125,129,101 cycles:u # 2.998 GHz (74.69%) + 27,700,211 stalled-cycles-frontend:u # 0.89% frontend cycles idle (74.76%) + 1,124,092,602 stalled-cycles-backend:u # 35.97% backend cycles idle (74.74%) + 2,948,330,703 instructions:u # 0.94 insn per cycle + # 0.38 stalled cycles per insn (74.80%) + 1.077623925 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.923451e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.977569e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.977569e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.937740e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.020684e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020684e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.596105 sec -INFO: No Floating Point Exceptions have been reported - 16,423,082,806 cycles # 2.932 GHz - 45,361,162,230 instructions # 2.76 insn per cycle - 5.601871750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.718315 sec +INFO: No Floating Point Exceptions have been reported + 12,679,943,630 cycles:u # 3.404 GHz (74.81%) + 7,147,017 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 16,502,581 stalled-cycles-backend:u # 0.13% backend cycles idle (75.09%) + 45,503,865,325 instructions:u # 3.59 insn per cycle + # 0.00 stalled cycles per insn (75.09%) + 3.727751012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.510624e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.845954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.845954e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.467999 sec -INFO: No Floating Point Exceptions have been reported - 7,259,263,758 cycles # 2.936 GHz - 17,804,964,488 instructions # 2.45 insn per cycle - 2.473643333 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.012329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.382116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.382116e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.890925 sec +INFO: No Floating Point Exceptions have been reported + 6,370,824,183 cycles:u # 3.357 GHz (74.61%) + 5,952,086 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.99%) + 2,785,178,986 stalled-cycles-backend:u # 43.72% backend cycles idle (75.13%) + 17,103,920,086 instructions:u # 2.68 insn per cycle + # 0.16 stalled cycles per insn (75.13%) + 1.899977692 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.271097e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.411462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.411462e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.412437 sec -INFO: No Floating Point Exceptions have been reported - 3,908,301,423 cycles # 2.757 GHz - 8,246,550,739 instructions # 2.11 insn per cycle - 1.418307229 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.186993e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330461e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330461e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.031162 sec +INFO: No Floating Point Exceptions have been reported + 3,393,358,695 cycles:u # 3.269 GHz (74.26%) + 7,282,823 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.43%) + 1,075,923,160 stalled-cycles-backend:u # 31.71% backend cycles idle (75.14%) + 8,043,926,213 instructions:u # 2.37 insn per cycle + # 0.13 stalled cycles per insn (75.35%) + 1.040189831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.660773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.926852e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.926852e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.356729 sec -INFO: No Floating Point Exceptions have been reported - 3,755,878,291 cycles # 2.759 GHz - 7,864,539,547 instructions # 2.09 insn per cycle - 1.362169016 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.442823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.089629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.089629e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.773182 sec -INFO: No Floating Point Exceptions have been reported - 3,435,797,893 cycles # 1.932 GHz - 6,046,565,657 instructions # 1.76 insn per cycle - 1.778888357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 8649c65a6a..47959b4036 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_18:10:07 -DATE: 2024-09-18_13:00:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.491816e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.706264e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.829121e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.638763 sec -INFO: No Floating Point Exceptions have been reported - 2,486,309,752 cycles # 2.846 GHz - 3,832,853,586 instructions # 1.54 insn per cycle - 0.933212094 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.618825e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.211694e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.237729e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.124995 sec +INFO: No Floating Point Exceptions have been reported + 3,476,336,954 cycles:u # 3.026 GHz (74.85%) + 36,627,328 stalled-cycles-frontend:u # 1.05% frontend cycles idle (74.54%) + 1,114,403,962 stalled-cycles-backend:u # 32.06% backend cycles idle (74.63%) + 3,788,721,035 instructions:u # 1.09 insn per cycle + # 0.29 stalled cycles per insn (74.82%) + 1.181447151 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.926255e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.980755e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.980755e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.530359 sec -INFO: No Floating Point Exceptions have been reported - 16,260,744,493 cycles # 2.938 GHz - 45,331,881,354 instructions # 2.79 insn per cycle - 5.536242796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.970848e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.055504e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.055504e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.676706 sec +INFO: No Floating Point Exceptions have been reported + 12,570,831,222 cycles:u # 3.412 GHz (75.03%) + 6,771,135 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) + 11,872,257 stalled-cycles-backend:u # 0.09% backend cycles idle (75.03%) + 45,451,149,469 instructions:u # 3.62 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 3.686013589 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.514119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.847574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.847574e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.409823 sec -INFO: No Floating Point Exceptions have been reported - 7,091,224,967 cycles # 2.937 GHz - 17,790,807,442 instructions # 2.51 insn per cycle - 2.415653910 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.021461e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.391294e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.391294e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.888421 sec +INFO: No Floating Point Exceptions have been reported + 6,350,127,223 cycles:u # 3.350 GHz (75.00%) + 6,579,219 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.10%) + 2,776,635,796 stalled-cycles-backend:u # 43.73% backend cycles idle (75.10%) + 17,072,514,219 instructions:u # 2.69 insn per cycle + # 0.16 stalled cycles per insn (75.10%) + 1.897472156 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.315319e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.466327e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.466327e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.347295 sec -INFO: No Floating Point Exceptions have been reported - 3,748,135,716 cycles # 2.771 GHz - 8,261,548,625 instructions # 2.20 insn per cycle - 1.353086220 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.189325e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.334414e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.334414e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.031970 sec +INFO: No Floating Point Exceptions have been reported + 3,372,633,035 cycles:u # 3.246 GHz (74.65%) + 7,160,636 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.62%) + 1,078,144,489 stalled-cycles-backend:u # 31.97% backend cycles idle (74.82%) + 8,046,994,912 instructions:u # 2.39 insn per cycle + # 0.13 stalled cycles per insn (75.24%) + 1.040907882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.772831e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005617e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005617e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.281889 sec -INFO: No Floating Point Exceptions have been reported - 3,559,044,656 cycles # 2.766 GHz - 7,911,466,674 instructions # 2.22 insn per cycle - 1.287610992 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.412498e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.103906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.103906e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.722151 sec -INFO: No Floating Point Exceptions have been reported - 3,304,024,823 cycles # 1.914 GHz - 6,099,911,719 instructions # 1.85 insn per cycle - 1.727529111 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index fbbd4d7aad..2513d26a17 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:13:59 -DATE: 2024-09-18_12:12:45 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.269829e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.739721e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.856627e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.490633 sec -INFO: No Floating Point Exceptions have been reported - 2,054,923,204 cycles # 2.868 GHz - 2,821,409,154 instructions # 1.37 insn per cycle - 0.774891828 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.843140e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.140200e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163746e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.345480 sec +INFO: No Floating Point Exceptions have been reported + 769,099,133 cycles:u # 2.131 GHz (74.97%) + 2,311,642 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.67%) + 12,497,784 stalled-cycles-backend:u # 1.62% backend cycles idle (75.45%) + 1,511,553,566 instructions:u # 1.97 insn per cycle + # 0.01 stalled cycles per insn (74.94%) + 0.492189685 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.972061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.029032e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.029032e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.402943 sec -INFO: No Floating Point Exceptions have been reported - 15,938,200,378 cycles # 2.947 GHz - 44,441,419,092 instructions # 2.79 insn per cycle - 5.408620560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.965127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.051164e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.051164e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.685667 sec +INFO: No Floating Point Exceptions have been reported + 12,497,981,355 cycles:u # 3.383 GHz (74.99%) + 6,231,610 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.93%) + 1,604,793,472 stalled-cycles-backend:u # 12.84% backend cycles idle (74.90%) + 44,383,694,468 instructions:u # 3.55 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 3.738273158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.316687e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.790003e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.790003e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.058425 sec -INFO: No Floating Point Exceptions have been reported - 6,073,730,384 cycles # 2.944 GHz - 17,080,831,031 instructions # 2.81 insn per cycle - 2.063919735 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.443597e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.024448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.024448e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.558930 sec +INFO: No Floating Point Exceptions have been reported + 5,185,372,749 cycles:u # 3.308 GHz (75.01%) + 6,775,953 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.00%) + 1,487,280,863 stalled-cycles-backend:u # 28.68% backend cycles idle (75.00%) + 16,897,152,513 instructions:u # 3.26 insn per cycle + # 0.09 stalled cycles per insn (75.00%) + 1.626354383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2753) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.040290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.607212e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.607212e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.821129 sec -INFO: No Floating Point Exceptions have been reported - 5,028,060,974 cycles # 2.754 GHz - 10,226,327,467 instructions # 2.03 insn per cycle - 1.826739648 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.732403e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.488075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.488075e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.348095 sec +INFO: No Floating Point Exceptions have been reported + 4,434,654,710 cycles:u # 3.269 GHz (74.70%) + 6,657,210 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.88%) + 1,733,238,057 stalled-cycles-backend:u # 39.08% backend cycles idle (75.17%) + 10,216,913,126 instructions:u # 2.30 insn per cycle + # 0.17 stalled cycles per insn (75.24%) + 1.426007773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3885) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.109926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.690770e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.690770e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.802334 sec -INFO: No Floating Point Exceptions have been reported - 4,967,999,007 cycles # 2.749 GHz - 9,996,248,012 instructions # 2.01 insn per cycle - 1.807786513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.589191e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.908384e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.908384e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.373161 sec -INFO: No Floating Point Exceptions have been reported - 4,379,373,712 cycles # 1.842 GHz - 8,445,292,719 instructions # 1.93 insn per cycle - 2.379096717 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index b94de9fae6..118dd06c8f 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:57:11 -DATE: 2024-09-18_12:44:41 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.109159e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.754036e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.870271e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.491488 sec -INFO: No Floating Point Exceptions have been reported - 2,077,663,912 cycles # 2.873 GHz - 2,918,599,943 instructions # 1.40 insn per cycle - 0.780254295 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.932103e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175689e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200034e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.336331 sec +INFO: No Floating Point Exceptions have been reported + 843,912,844 cycles:u # 2.404 GHz (72.87%) + 2,445,193 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.96%) + 7,612,619 stalled-cycles-backend:u # 0.90% backend cycles idle (76.93%) + 1,427,522,026 instructions:u # 1.69 insn per cycle + # 0.01 stalled cycles per insn (75.89%) + 0.392680618 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.511694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.603843e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.603843e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.261367 sec -INFO: No Floating Point Exceptions have been reported - 12,578,636,437 cycles # 2.949 GHz - 34,608,642,396 instructions # 2.75 insn per cycle - 4.266948834 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.694570e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.826993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.826993e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.986758 sec +INFO: No Floating Point Exceptions have been reported + 10,133,396,715 cycles:u # 3.384 GHz (74.90%) + 6,350,756 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.78%) + 1,086,346,025 stalled-cycles-backend:u # 10.72% backend cycles idle (74.84%) + 34,556,648,168 instructions:u # 3.41 insn per cycle + # 0.03 stalled cycles per insn (75.09%) + 2.999304468 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 762) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.245729e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.707902e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.707902e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.085904 sec -INFO: No Floating Point Exceptions have been reported - 6,139,006,311 cycles # 2.936 GHz - 14,814,345,795 instructions # 2.41 insn per cycle - 2.091585873 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.421806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.989977e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.989977e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.558597 sec +INFO: No Floating Point Exceptions have been reported + 5,216,610,830 cycles:u # 3.331 GHz (75.02%) + 6,672,763 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.97%) + 1,881,496,588 stalled-cycles-backend:u # 36.07% backend cycles idle (74.97%) + 14,512,314,160 instructions:u # 2.78 insn per cycle + # 0.13 stalled cycles per insn (74.97%) + 1.570709178 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193755550310 -Relative difference = 1.8511017053446366e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198769558221 +Relative difference = 6.06481491495597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.217326e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.053698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.053698e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.539150 sec -INFO: No Floating Point Exceptions have been reported - 4,266,849,527 cycles # 2.764 GHz - 9,068,527,132 instructions # 2.13 insn per cycle - 1.544604329 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4456) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.666171e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.059188e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059188e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.230905 sec +INFO: No Floating Point Exceptions have been reported + 4,071,672,464 cycles:u # 3.288 GHz (74.81%) + 7,713,984 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.81%) + 1,584,620,055 stalled-cycles-backend:u # 38.92% backend cycles idle (74.87%) + 8,963,815,100 instructions:u # 2.20 insn per cycle + # 0.18 stalled cycles per insn (74.87%) + 1.243036539 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4429) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182069780305 -Relative difference = 1.0201902325125583e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186736870557 +Relative difference = 1.6083886449260875e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.341390e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.190395e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.190395e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.513835 sec -INFO: No Floating Point Exceptions have been reported - 4,209,677,652 cycles # 2.772 GHz - 8,658,962,407 instructions # 2.06 insn per cycle - 1.519314933 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4233) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182069780305 -Relative difference = 1.0201902325125583e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.363197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.802509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.802509e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.041800 sec -INFO: No Floating Point Exceptions have been reported - 3,848,539,052 cycles # 1.880 GHz - 7,805,686,420 instructions # 2.03 insn per cycle - 2.047559874 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183246739209 -Relative difference = 1.6003107281264138e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 647db6d470..b7d1150cb7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:57:21 -DATE: 2024-09-18_12:45:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.181121e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.754734e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.875454e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.493098 sec -INFO: No Floating Point Exceptions have been reported - 2,068,560,161 cycles # 2.866 GHz - 2,913,404,401 instructions # 1.41 insn per cycle - 0.778994585 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.869011e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134218e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.157950e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.339311 sec +INFO: No Floating Point Exceptions have been reported + 825,835,247 cycles:u # 2.323 GHz (75.40%) + 2,448,227 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.37%) + 13,017,603 stalled-cycles-backend:u # 1.58% backend cycles idle (75.25%) + 1,457,584,553 instructions:u # 1.76 insn per cycle + # 0.01 stalled cycles per insn (76.31%) + 0.400194249 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.673571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.779018e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.779018e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.007457 sec -INFO: No Floating Point Exceptions have been reported - 11,821,622,506 cycles # 2.947 GHz - 35,077,213,703 instructions # 2.97 insn per cycle - 4.012923546 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.954979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.108161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.108161e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.797007 sec +INFO: No Floating Point Exceptions have been reported + 9,501,070,094 cycles:u # 3.388 GHz (74.92%) + 6,219,947 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) + 9,308,030 stalled-cycles-backend:u # 0.10% backend cycles idle (74.92%) + 34,666,442,750 instructions:u # 3.65 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 2.809775897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 434) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.446071e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.947640e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.947640e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.011399 sec -INFO: No Floating Point Exceptions have been reported - 5,918,531,500 cycles # 2.935 GHz - 14,532,054,201 instructions # 2.46 insn per cycle - 2.017166521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.127608e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.834460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.834460e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.435085 sec +INFO: No Floating Point Exceptions have been reported + 4,759,046,807 cycles:u # 3.300 GHz (75.05%) + 7,247,357 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.04%) + 1,159,148,260 stalled-cycles-backend:u # 24.36% backend cycles idle (75.04%) + 13,935,870,455 instructions:u # 2.93 insn per cycle + # 0.08 stalled cycles per insn (75.05%) + 1.446829797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2467) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193583255634 -Relative difference = 1.7661780742548925e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198892958462 +Relative difference = 5.4565783974899003e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.388337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.293979e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.293979e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.508049 sec -INFO: No Floating Point Exceptions have been reported - 4,192,067,529 cycles # 2.771 GHz - 8,850,538,175 instructions # 2.11 insn per cycle - 1.513555792 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3552) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.020322e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.125599e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.125599e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.174617 sec +INFO: No Floating Point Exceptions have been reported + 3,873,061,459 cycles:u # 3.276 GHz (74.86%) + 7,352,600 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.65%) + 1,416,471,421 stalled-cycles-backend:u # 36.57% backend cycles idle (74.96%) + 8,561,369,225 instructions:u # 2.21 insn per cycle + # 0.17 stalled cycles per insn (74.96%) + 1.186707415 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3397) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186836987734 +Relative difference = 1.559041129563128e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.539896e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.448863e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.448863e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.476824 sec -INFO: No Floating Point Exceptions have been reported - 4,124,218,335 cycles # 2.783 GHz - 8,408,510,612 instructions # 2.04 insn per cycle - 1.482399691 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.510377e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.974414e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.974414e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.989253 sec -INFO: No Floating Point Exceptions have been reported - 3,785,582,278 cycles # 1.899 GHz - 7,698,584,647 instructions # 2.03 insn per cycle - 1.994773359 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183204829693 -Relative difference = 1.5796536184903122e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index ac99bf7b60..6faae54f7c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:14:10 -DATE: 2024-09-18_12:13:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.415407e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.358342e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002564e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.531223 sec -INFO: No Floating Point Exceptions have been reported - 2,212,715,399 cycles # 2.883 GHz - 3,174,354,481 instructions # 1.43 insn per cycle - 0.824625337 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.932877e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.456336e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.477647e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.430186 sec +INFO: No Floating Point Exceptions have been reported + 1,011,551,860 cycles:u # 2.308 GHz (75.93%) + 2,474,973 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.46%) + 5,574,299 stalled-cycles-backend:u # 0.55% backend cycles idle (74.50%) + 1,495,779,097 instructions:u # 1.48 insn per cycle + # 0.00 stalled cycles per insn (74.63%) + 0.628695657 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.812224e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.858502e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.858502e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.893700 sec -INFO: No Floating Point Exceptions have been reported - 17,384,515,155 cycles # 2.947 GHz - 46,085,827,160 instructions # 2.65 insn per cycle - 5.899425018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.516895e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.579376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.579376e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.354211 sec +INFO: No Floating Point Exceptions have been reported + 14,455,072,921 cycles:u # 3.309 GHz (75.00%) + 8,798,887 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 3,863,259,247 stalled-cycles-backend:u # 26.73% backend cycles idle (74.91%) + 45,859,438,536 instructions:u # 3.17 insn per cycle + # 0.08 stalled cycles per insn (74.95%) + 4.430543616 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.230178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.393068e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.393068e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.353996 sec -INFO: No Floating Point Exceptions have been reported - 9,906,776,741 cycles # 2.949 GHz - 27,581,204,322 instructions # 2.78 insn per cycle - 3.359750594 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.234560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.423000e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.423000e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.661992 sec +INFO: No Floating Point Exceptions have been reported + 8,892,538,145 cycles:u # 3.324 GHz (74.91%) + 9,095,708 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.88%) + 2,749,417,047 stalled-cycles-backend:u # 30.92% backend cycles idle (75.03%) + 27,514,557,439 instructions:u # 3.09 insn per cycle + # 0.10 stalled cycles per insn (75.17%) + 2.736935695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.070803e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.467527e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.467527e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.175848 sec -INFO: No Floating Point Exceptions have been reported - 6,033,401,789 cycles # 2.767 GHz - 12,481,778,172 instructions # 2.07 insn per cycle - 2.181604261 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2773) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.387440e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.936802e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.936802e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.603726 sec +INFO: No Floating Point Exceptions have been reported + 5,224,190,372 cycles:u # 3.230 GHz (75.17%) + 9,719,986 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.12%) + 942,360,412 stalled-cycles-backend:u # 18.04% backend cycles idle (74.72%) + 12,352,759,131 instructions:u # 2.36 insn per cycle + # 0.08 stalled cycles per insn (74.60%) + 1.699215859 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.576261e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.054850e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.054850e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.987931 sec -INFO: No Floating Point Exceptions have been reported - 5,526,359,959 cycles # 2.773 GHz - 11,919,157,674 instructions # 2.16 insn per cycle - 1.993761374 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2518) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.583667e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.773486e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.773486e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.034227 sec -INFO: No Floating Point Exceptions have been reported - 5,618,120,727 cycles # 1.849 GHz - 8,105,692,593 instructions # 1.44 insn per cycle - 3.040009315 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1862) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index d60a3db604..2e1f89feb6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_17:14:23 -DATE: 2024-09-18_12:13:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.391860e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.272095e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.937370e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.534370 sec -INFO: No Floating Point Exceptions have been reported - 2,212,045,639 cycles # 2.882 GHz - 3,154,512,029 instructions # 1.43 insn per cycle - 0.826500836 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.981612e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.541037e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.563162e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.401933 sec +INFO: No Floating Point Exceptions have been reported + 965,281,867 cycles:u # 2.286 GHz (75.43%) + 2,505,651 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.44%) + 6,141,178 stalled-cycles-backend:u # 0.64% backend cycles idle (74.76%) + 1,610,457,772 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (75.15%) + 0.585324236 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.857330e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.905433e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.905433e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.752139 sec -INFO: No Floating Point Exceptions have been reported - 16,956,103,485 cycles # 2.946 GHz - 45,111,671,387 instructions # 2.66 insn per cycle - 5.757950281 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.566258e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.630147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.630147e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.271754 sec +INFO: No Floating Point Exceptions have been reported + 14,501,888,548 cycles:u # 3.384 GHz (74.99%) + 8,976,778 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) + 3,364,261,054 stalled-cycles-backend:u # 23.20% backend cycles idle (75.00%) + 44,578,588,606 instructions:u # 3.07 insn per cycle + # 0.08 stalled cycles per insn (75.00%) + 4.362082746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.369201e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.545470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.545470e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.220654 sec -INFO: No Floating Point Exceptions have been reported - 9,518,675,134 cycles # 2.951 GHz - 26,252,301,051 instructions # 2.76 insn per cycle - 3.226704286 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.583365e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.804007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.804007e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.471505 sec +INFO: No Floating Point Exceptions have been reported + 8,278,088,324 cycles:u # 3.331 GHz (74.92%) + 9,533,878 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.92%) + 1,272,690,434 stalled-cycles-backend:u # 15.37% backend cycles idle (74.89%) + 26,382,503,426 instructions:u # 3.19 insn per cycle + # 0.05 stalled cycles per insn (75.05%) + 2.553064814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.516544e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.830416e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.830416e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.429009 sec -INFO: No Floating Point Exceptions have been reported - 6,737,120,781 cycles # 2.769 GHz - 14,029,549,404 instructions # 2.08 insn per cycle - 2.434732608 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.441479e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.854446e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.854446e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.812364 sec +INFO: No Floating Point Exceptions have been reported + 5,984,800,024 cycles:u # 3.278 GHz (74.93%) + 8,242,587 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.03%) + 1,798,859,897 stalled-cycles-backend:u # 30.06% backend cycles idle (75.03%) + 13,980,239,803 instructions:u # 2.34 insn per cycle + # 0.13 stalled cycles per insn (75.03%) + 1.897920283 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.763106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.113046e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.113046e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.308488 sec -INFO: No Floating Point Exceptions have been reported - 6,400,709,122 cycles # 2.767 GHz - 13,521,645,446 instructions # 2.11 insn per cycle - 2.314138282 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.631126e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.827064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.827064e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.996339 sec -INFO: No Floating Point Exceptions have been reported - 5,581,413,243 cycles # 1.860 GHz - 9,205,937,992 instructions # 1.65 insn per cycle - 3.002095742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index de5eca26a8..ff8a6789a6 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:14:36 -DATE: 2024-09-18_12:13:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.672201e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.887935e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.992853e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.477007 sec -INFO: No Floating Point Exceptions have been reported - 1,987,624,447 cycles # 2.874 GHz - 2,861,967,134 instructions # 1.44 insn per cycle - 0.751704376 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.457022e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.560381e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.562252e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.563088 sec +INFO: No Floating Point Exceptions have been reported + 1,184,409,252 cycles:u # 2.674 GHz (74.37%) + 2,616,298 stalled-cycles-frontend:u # 0.22% frontend cycles idle (73.85%) + 8,712,145 stalled-cycles-backend:u # 0.74% backend cycles idle (73.91%) + 1,724,638,719 instructions:u # 1.46 insn per cycle + # 0.01 stalled cycles per insn (75.75%) + 0.700799232 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.044656e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231568e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242034e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.614374 sec -INFO: No Floating Point Exceptions have been reported - 2,464,089,898 cycles # 2.883 GHz - 3,693,413,015 instructions # 1.50 insn per cycle - 0.914175309 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.606899e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.729548e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.732092e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.720731 sec +INFO: No Floating Point Exceptions have been reported + 2,032,976,036 cycles:u # 2.726 GHz (74.65%) + 2,410,844 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.42%) + 12,000,473 stalled-cycles-backend:u # 0.59% backend cycles idle (74.45%) + 2,481,165,172 instructions:u # 1.22 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 0.779185094 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.435389e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.447579e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.447579e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.748690 sec -INFO: No Floating Point Exceptions have been reported - 19,905,580,584 cycles # 2.948 GHz - 59,914,464,179 instructions # 3.01 insn per cycle - 6.753011110 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.353659e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368809e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368809e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.910166 sec +INFO: No Floating Point Exceptions have been reported + 16,973,945,281 cycles:u # 3.455 GHz (74.93%) + 2,851,139 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) + 3,581,679,834 stalled-cycles-backend:u # 21.10% backend cycles idle (75.05%) + 57,020,220,816 instructions:u # 3.36 insn per cycle + # 0.06 stalled cycles per insn (75.09%) + 4.985447360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.605126e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.648126e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.648126e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.577729 sec -INFO: No Floating Point Exceptions have been reported - 10,567,541,735 cycles # 2.951 GHz - 31,084,954,146 instructions # 2.94 insn per cycle - 3.582009862 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.536241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.593971e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.593971e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.531297 sec +INFO: No Floating Point Exceptions have been reported + 8,802,208,640 cycles:u # 3.472 GHz (74.91%) + 2,137,896 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) + 1,782,641,515 stalled-cycles-backend:u # 20.25% backend cycles idle (75.07%) + 29,959,209,250 instructions:u # 3.40 insn per cycle + # 0.06 stalled cycles per insn (75.07%) + 2.621443926 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.119843e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.286275e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.286275e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.816628 sec -INFO: No Floating Point Exceptions have been reported - 5,009,875,098 cycles # 2.752 GHz - 11,404,863,740 instructions # 2.28 insn per cycle - 1.820981146 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.319296e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.343516e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.343516e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.273309 sec +INFO: No Floating Point Exceptions have been reported + 4,370,369,702 cycles:u # 3.440 GHz (74.82%) + 2,062,289 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.81%) + 1,128,447,471 stalled-cycles-backend:u # 25.82% backend cycles idle (74.86%) + 11,112,411,564 instructions:u # 2.54 insn per cycle + # 0.10 stalled cycles per insn (75.12%) + 1.369812125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.027376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.048667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.048667e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.614828 sec -INFO: No Floating Point Exceptions have been reported - 4,447,516,452 cycles # 2.748 GHz - 10,663,621,215 instructions # 2.40 insn per cycle - 1.619180273 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.153517e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.257338e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.257338e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.312792 sec -INFO: No Floating Point Exceptions have been reported - 4,128,948,366 cycles # 1.783 GHz - 5,970,641,302 instructions # 1.45 insn per cycle - 2.317202499 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 9c43264546..ac5312cb18 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_18:06:44 -DATE: 2024-09-18_12:53:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.507916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.178599e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.178599e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.502648 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,068,761,834 cycles # 2.877 GHz - 3,090,755,102 instructions # 1.49 insn per cycle - 0.775689457 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.224846e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.543995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.543995e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.582869 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,765,589,761 cycles:u # 2.931 GHz (75.25%) + 7,035,236 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.97%) + 261,985,210 stalled-cycles-backend:u # 14.84% backend cycles idle (75.82%) + 2,169,843,448 instructions:u # 1.23 insn per cycle + # 0.12 stalled cycles per insn (74.85%) + 0.635912960 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.673734e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.373672e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.373672e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.833395 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,144,886,808 cycles # 2.895 GHz - 5,022,532,373 instructions # 1.60 insn per cycle - 1.144806482 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.821635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.643478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.643478e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 +TOTAL : 1.347374 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,073,598,998 cycles:u # 2.950 GHz (75.06%) + 16,322,403 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.13%) + 834,923,477 stalled-cycles-backend:u # 20.50% backend cycles idle (74.95%) + 4,169,863,141 instructions:u # 1.02 insn per cycle + # 0.20 stalled cycles per insn (75.35%) + 1.419348394 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.430493e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.443016e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.443016e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.770345 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,935,799,744 cycles # 2.943 GHz - 59,921,717,219 instructions # 3.01 insn per cycle - 6.775096176 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.296074e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.310817e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310817e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.999365 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,306,681,704 cycles:u # 3.460 GHz (74.91%) + 2,853,983 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) + 3,674,045,388 stalled-cycles-backend:u # 21.23% backend cycles idle (75.01%) + 56,907,854,742 instructions:u # 3.29 insn per cycle + # 0.06 stalled cycles per insn (75.05%) + 5.007805214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.571029e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.615207e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.615207e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.613337 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,624,808,815 cycles # 2.938 GHz - 31,136,068,452 instructions # 2.93 insn per cycle - 3.618153867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.510110e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.567162e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.567162e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.544813 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,813,377,561 cycles:u # 3.459 GHz (74.89%) + 1,975,694 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) + 1,765,423,443 stalled-cycles-backend:u # 20.03% backend cycles idle (74.90%) + 29,975,613,508 instructions:u # 3.40 insn per cycle + # 0.06 stalled cycles per insn (75.01%) + 2.552904627 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.976002e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.144750e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.144750e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.854528 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,122,960,994 cycles # 2.757 GHz - 11,456,752,385 instructions # 2.24 insn per cycle - 1.859209871 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.306142e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330343e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.282549 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,428,624,680 cycles:u # 3.444 GHz (74.75%) + 689,538 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) + 1,170,114,158 stalled-cycles-backend:u # 26.42% backend cycles idle (75.12%) + 11,121,515,676 instructions:u # 2.51 insn per cycle + # 0.11 stalled cycles per insn (75.12%) + 1.290561471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.023623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045107e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.629549 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,493,284,400 cycles # 2.751 GHz - 10,714,819,935 instructions # 2.38 insn per cycle - 1.634203375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.121040e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.229108e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.229108e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.332216 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,174,771,858 cycles # 1.787 GHz - 6,010,349,590 instructions # 1.44 insn per cycle - 2.336931936 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 8cdcf50b56..f634fe824c 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:14:50 -DATE: 2024-09-18_12:14:23 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.625266e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.900146e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.003656e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.475988 sec -INFO: No Floating Point Exceptions have been reported - 1,977,726,110 cycles # 2.850 GHz - 2,827,901,574 instructions # 1.43 insn per cycle - 0.751347521 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.453569e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.558130e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.560094e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.443988 sec +INFO: No Floating Point Exceptions have been reported + 1,154,023,417 cycles:u # 2.635 GHz (75.61%) + 2,680,026 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.04%) + 8,282,079 stalled-cycles-backend:u # 0.72% backend cycles idle (75.44%) + 1,744,767,704 instructions:u # 1.51 insn per cycle + # 0.00 stalled cycles per insn (74.67%) + 0.573796584 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.046576e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234116e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.244538e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.611168 sec -INFO: No Floating Point Exceptions have been reported - 2,454,142,578 cycles # 2.886 GHz - 3,695,001,143 instructions # 1.51 insn per cycle - 0.909771724 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.623090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.749531e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752290e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.731632 sec +INFO: No Floating Point Exceptions have been reported + 2,026,109,757 cycles:u # 2.737 GHz (74.27%) + 2,453,713 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.47%) + 7,820,498 stalled-cycles-backend:u # 0.39% backend cycles idle (75.65%) + 2,448,337,363 instructions:u # 1.21 insn per cycle + # 0.00 stalled cycles per insn (75.70%) + 0.794710009 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.436838e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.448877e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.448877e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.744113 sec -INFO: No Floating Point Exceptions have been reported - 19,898,434,725 cycles # 2.949 GHz - 60,128,447,647 instructions # 3.02 insn per cycle - 6.748351399 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.496489e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.513469e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.513469e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.710710 sec +INFO: No Floating Point Exceptions have been reported + 16,372,965,323 cycles:u # 3.473 GHz (74.86%) + 2,465,696 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.95%) + 3,842,922,790 stalled-cycles-backend:u # 23.47% backend cycles idle (75.03%) + 56,492,878,028 instructions:u # 3.45 insn per cycle + # 0.07 stalled cycles per insn (75.06%) + 4.821352012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432427 +Relative difference = 4.4692302386886357e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.649169e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.692956e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.692956e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.544209 sec -INFO: No Floating Point Exceptions have been reported - 10,481,283,758 cycles # 2.954 GHz - 30,686,827,574 instructions # 2.93 insn per cycle - 3.548515404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.246723e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.299838e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.299838e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.647063 sec +INFO: No Floating Point Exceptions have been reported + 9,148,950,670 cycles:u # 3.451 GHz (75.10%) + 2,224,960 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) + 2,631,107,871 stalled-cycles-backend:u # 28.76% backend cycles idle (75.03%) + 30,439,427,557 instructions:u # 3.33 insn per cycle + # 0.09 stalled cycles per insn (74.96%) + 2.794627265 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4697) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432431 +Relative difference = 4.4692302355460254e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.897572e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.058943e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.058943e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.861619 sec -INFO: No Floating Point Exceptions have been reported - 5,141,047,361 cycles # 2.756 GHz - 11,838,355,420 instructions # 2.30 insn per cycle - 1.866119668 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4746) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.211077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231611e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.377740 sec +INFO: No Floating Point Exceptions have been reported + 4,740,821,780 cycles:u # 3.433 GHz (74.78%) + 2,040,865 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.02%) + 1,475,086,457 stalled-cycles-backend:u # 31.11% backend cycles idle (75.10%) + 11,727,325,539 instructions:u # 2.47 insn per cycle + # 0.13 stalled cycles per insn (75.10%) + 1.453655303 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4465) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.640218e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.828831e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.828831e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.719787 sec -INFO: No Floating Point Exceptions have been reported - 4,732,734,719 cycles # 2.746 GHz - 11,163,471,114 instructions # 2.36 insn per cycle - 1.724312193 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 246) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.072241e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.175446e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.175446e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.339175 sec -INFO: No Floating Point Exceptions have been reported - 4,159,319,454 cycles # 1.776 GHz - 6,222,343,045 instructions # 1.50 insn per cycle - 2.343565013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1516) (512y: 139) (512z: 3679) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index b9aad18eeb..5245312e04 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:15:04 -DATE: 2024-09-18_12:14:49 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.682161e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.012912e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.052707e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.457449 sec -INFO: No Floating Point Exceptions have been reported - 1,934,954,114 cycles # 2.865 GHz - 2,736,882,841 instructions # 1.41 insn per cycle - 0.732650423 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.088422e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576264e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583915e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 +TOTAL : 0.370776 sec +INFO: No Floating Point Exceptions have been reported + 948,898,570 cycles:u # 2.522 GHz (74.93%) + 2,404,849 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.45%) + 11,570,294 stalled-cycles-backend:u # 1.22% backend cycles idle (74.24%) + 1,473,357,888 instructions:u # 1.55 insn per cycle + # 0.01 stalled cycles per insn (75.76%) + 0.511369879 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.683155e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.385425e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.426136e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.507459 sec -INFO: No Floating Point Exceptions have been reported - 2,120,407,613 cycles # 2.883 GHz - 3,024,448,335 instructions # 1.43 insn per cycle - 0.792985016 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.899042e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.025660e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027519e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.520560 sec +INFO: No Floating Point Exceptions have been reported + 1,344,404,243 cycles:u # 2.601 GHz (74.57%) + 2,658,037 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.26%) + 4,825,135 stalled-cycles-backend:u # 0.36% backend cycles idle (74.44%) + 1,788,812,845 instructions:u # 1.33 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 0.579967163 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.506015e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.518972e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.518972e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.556943 sec -INFO: No Floating Point Exceptions have been reported - 19,264,218,294 cycles # 2.937 GHz - 59,614,798,383 instructions # 3.09 insn per cycle - 6.560956742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.674362e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.693480e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.693480e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.481835 sec +INFO: No Floating Point Exceptions have been reported + 15,527,623,702 cycles:u # 3.462 GHz (74.93%) + 1,358,327 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) + 2,474,889,742 stalled-cycles-backend:u # 15.94% backend cycles idle (75.03%) + 56,588,613,692 instructions:u # 3.64 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 4.569830402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859809517598 +Relative difference = 1.3480841507557613e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.070356e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.207853e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.207853e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.048077 sec -INFO: No Floating Point Exceptions have been reported - 6,023,874,049 cycles # 2.936 GHz - 17,061,893,848 instructions # 2.83 insn per cycle - 2.052246672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.142338e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160857e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.456926 sec +INFO: No Floating Point Exceptions have been reported + 5,046,759,545 cycles:u # 3.454 GHz (74.96%) + 1,785,779 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.85%) + 1,556,603,635 stalled-cycles-backend:u # 30.84% backend cycles idle (74.82%) + 16,277,524,788 instructions:u # 3.23 insn per cycle + # 0.10 stalled cycles per insn (74.82%) + 1.557952037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857731430207 +Relative difference = 1.6055147002442227e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.743575e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.804848e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.804848e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.957876 sec -INFO: No Floating Point Exceptions have been reported - 2,640,887,772 cycles # 2.747 GHz - 6,187,336,173 instructions # 2.34 insn per cycle - 0.962119669 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.446494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.531677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.531677e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.692125 sec +INFO: No Floating Point Exceptions have been reported + 2,383,806,406 cycles:u # 3.426 GHz (74.81%) + 1,772,559 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.72%) + 741,384,713 stalled-cycles-backend:u # 31.10% backend cycles idle (74.72%) + 6,038,789,305 instructions:u # 2.53 insn per cycle + # 0.12 stalled cycles per insn (74.64%) + 0.790224849 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.915124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.989470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.989470e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.873685 sec -INFO: No Floating Point Exceptions have been reported - 2,402,820,009 cycles # 2.739 GHz - 5,790,162,566 instructions # 2.41 insn per cycle - 0.877828237 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.453255e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.496895e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.496895e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.147590 sec -INFO: No Floating Point Exceptions have been reported - 2,076,037,431 cycles # 1.804 GHz - 3,391,394,333 instructions # 1.63 insn per cycle - 1.151886126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 1d937591ab..065f27f41c 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_18:06:58 -DATE: 2024-09-18_12:54:13 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.452792e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504415e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504415e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.473960 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,972,315,192 cycles # 2.868 GHz - 2,911,549,142 instructions # 1.48 insn per cycle - 0.746422585 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.218326e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576933e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576933e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.755508e+02 +- 2.671054e+02 ) GeV^-2 +TOTAL : 0.526714 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,521,602,665 cycles:u # 2.785 GHz (76.58%) + 7,096,429 stalled-cycles-frontend:u # 0.47% frontend cycles idle (76.26%) + 261,488,952 stalled-cycles-backend:u # 17.19% backend cycles idle (75.53%) + 2,022,238,545 instructions:u # 1.33 insn per cycle + # 0.13 stalled cycles per insn (73.87%) + 0.575439595 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.537799e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.260766e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.260766e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 -TOTAL : 0.654007 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,561,684,908 cycles # 2.881 GHz - 3,893,804,940 instructions # 1.52 insn per cycle - 0.947747663 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.625160e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.546838e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.546838e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.855939e+03 +- 1.791987e+03 ) GeV^-2 +TOTAL : 1.140152 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,390,394,734 cycles:u # 2.958 GHz (75.04%) + 16,797,318 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.69%) + 842,692,687 stalled-cycles-backend:u # 24.86% backend cycles idle (74.65%) + 3,619,427,436 instructions:u # 1.07 insn per cycle + # 0.23 stalled cycles per insn (74.63%) + 1.202730605 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.511557e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.524668e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.524668e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.546653 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,271,491,646 cycles # 2.942 GHz - 59,619,016,957 instructions # 3.09 insn per cycle - 6.550964309 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.657263e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676339e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.676339e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.504604 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,586,193,933 cycles:u # 3.458 GHz (74.99%) + 2,459,790 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) + 2,460,055,112 stalled-cycles-backend:u # 15.78% backend cycles idle (74.98%) + 56,619,213,385 instructions:u # 3.63 insn per cycle + # 0.04 stalled cycles per insn (74.98%) + 4.512308073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859809517598 +Relative difference = 1.3480841507557613e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.075407e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.222998e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.222998e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 2.052234 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,045,406,228 cycles # 2.940 GHz - 17,110,194,161 instructions # 2.83 insn per cycle - 2.056632379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.133341e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.151657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151657e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.470448 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,086,506,407 cycles:u # 3.452 GHz (75.07%) + 1,843,913 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.03%) + 1,597,361,677 stalled-cycles-backend:u # 31.40% backend cycles idle (75.03%) + 16,294,987,883 instructions:u # 3.20 insn per cycle + # 0.10 stalled cycles per insn (75.03%) + 1.478241212 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857731430207 +Relative difference = 1.6055147002442227e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.741961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.804731e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.804731e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.963728 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,665,943,840 cycles # 2.756 GHz - 6,224,556,067 instructions # 2.33 insn per cycle - 0.968076233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.437539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.521085e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521085e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.696690 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,399,754,400 cycles:u # 3.430 GHz (74.86%) + 1,783,318 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.85%) + 734,442,519 stalled-cycles-backend:u # 30.60% backend cycles idle (74.85%) + 6,058,668,674 instructions:u # 2.52 insn per cycle + # 0.12 stalled cycles per insn (74.85%) + 0.704553472 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.889422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.966151e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.966151e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.890681 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,436,025,235 cycles # 2.723 GHz - 5,827,123,635 instructions # 2.39 insn per cycle - 0.895318545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.443092e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.487537e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.487537e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.160657 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,101,025,117 cycles # 1.805 GHz - 3,433,428,500 instructions # 1.63 insn per cycle - 1.165027687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 4251937b55..eabcf1b0aa 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:15:16 -DATE: 2024-09-18_12:15:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.677136e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.031870e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.066369e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.462735 sec -INFO: No Floating Point Exceptions have been reported - 1,946,975,962 cycles # 2.877 GHz - 2,736,807,999 instructions # 1.41 insn per cycle - 0.735970844 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.173106e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.633608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.641451e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 +TOTAL : 0.367062 sec +INFO: No Floating Point Exceptions have been reported + 952,943,414 cycles:u # 2.503 GHz (75.96%) + 2,360,452 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.84%) + 11,754,320 stalled-cycles-backend:u # 1.23% backend cycles idle (75.56%) + 1,498,730,840 instructions:u # 1.57 insn per cycle + # 0.01 stalled cycles per insn (76.07%) + 0.538153724 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.680283e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.366147e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409371e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.508125 sec -INFO: No Floating Point Exceptions have been reported - 2,113,521,464 cycles # 2.866 GHz - 3,052,176,829 instructions # 1.44 insn per cycle - 0.794975331 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.857646e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.014449e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.016289e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.492332 sec +INFO: No Floating Point Exceptions have been reported + 1,311,451,654 cycles:u # 2.565 GHz (75.44%) + 2,488,966 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.80%) + 6,593,159 stalled-cycles-backend:u # 0.50% backend cycles idle (75.43%) + 1,831,245,848 instructions:u # 1.40 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 0.549050391 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.498167e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510888e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.510888e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.577268 sec -INFO: No Floating Point Exceptions have been reported - 19,407,580,643 cycles # 2.949 GHz - 59,354,263,399 instructions # 3.06 insn per cycle - 6.581442326 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.654108e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672913e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672913e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.506859 sec +INFO: No Floating Point Exceptions have been reported + 15,465,039,739 cycles:u # 3.429 GHz (75.00%) + 2,298,886 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 2,761,224,980 stalled-cycles-backend:u # 17.85% backend cycles idle (74.99%) + 56,323,382,399 instructions:u # 3.64 insn per cycle + # 0.05 stalled cycles per insn (74.99%) + 4.574466306 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859511640177 +Relative difference = 3.456225494743424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.398785e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.549497e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.549497e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.968439 sec -INFO: No Floating Point Exceptions have been reported - 5,775,824,576 cycles # 2.929 GHz - 16,849,685,670 instructions # 2.92 insn per cycle - 1.972573842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.164495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184500e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.184500e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.429209 sec +INFO: No Floating Point Exceptions have been reported + 4,860,552,892 cycles:u # 3.392 GHz (75.24%) + 1,895,413 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.16%) + 1,475,677,075 stalled-cycles-backend:u # 30.36% backend cycles idle (74.95%) + 16,358,411,245 instructions:u # 3.37 insn per cycle + # 0.09 stalled cycles per insn (74.88%) + 1.541885001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5045) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858306637857 +Relative difference = 1.1984281117008586e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.527004e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.573961e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.573961e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.091009 sec -INFO: No Floating Point Exceptions have been reported - 3,021,095,483 cycles # 2.760 GHz - 6,848,870,145 instructions # 2.27 insn per cycle - 1.095189540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.108598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.172019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.172019e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.799467 sec +INFO: No Floating Point Exceptions have been reported + 2,731,164,326 cycles:u # 3.399 GHz (75.00%) + 1,756,161 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.19%) + 823,276,493 stalled-cycles-backend:u # 30.14% backend cycles idle (75.11%) + 6,696,704,556 instructions:u # 2.45 insn per cycle + # 0.12 stalled cycles per insn (75.12%) + 0.894493118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5386) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.611080e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664155e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664155e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.035237 sec -INFO: No Floating Point Exceptions have been reported - 2,858,508,214 cycles # 2.752 GHz - 6,438,110,737 instructions # 2.25 insn per cycle - 1.039480125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.329594e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.366106e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.366106e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.252535 sec -INFO: No Floating Point Exceptions have been reported - 2,255,457,879 cycles # 1.796 GHz - 3,755,585,205 instructions # 1.67 insn per cycle - 1.256791945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 28) (512z: 4084) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 09551986c9..575c62cc1a 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:15:28 -DATE: 2024-09-18_12:15:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.594581e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.871606e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.970951e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473359 sec -INFO: No Floating Point Exceptions have been reported - 1,992,272,995 cycles # 2.881 GHz - 2,873,441,271 instructions # 1.44 insn per cycle - 0.748198068 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.435119e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.539348e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.541237e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.463804 sec +INFO: No Floating Point Exceptions have been reported + 1,178,530,093 cycles:u # 2.645 GHz (75.92%) + 2,606,684 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.53%) + 5,150,807 stalled-cycles-backend:u # 0.44% backend cycles idle (76.07%) + 1,656,880,144 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (74.17%) + 0.595192337 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.037058e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.222746e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.233058e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.613274 sec -INFO: No Floating Point Exceptions have been reported - 2,464,972,790 cycles # 2.891 GHz - 3,748,511,486 instructions # 1.52 insn per cycle - 0.912099116 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.600834e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.726711e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.729242e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.725011 sec +INFO: No Floating Point Exceptions have been reported + 1,977,744,096 cycles:u # 2.638 GHz (75.57%) + 2,415,379 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.38%) + 6,005,818 stalled-cycles-backend:u # 0.30% backend cycles idle (75.23%) + 2,514,772,251 instructions:u # 1.27 insn per cycle + # 0.00 stalled cycles per insn (74.24%) + 0.791388927 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.399672e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.411896e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.411896e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.848810 sec -INFO: No Floating Point Exceptions have been reported - 20,197,037,339 cycles # 2.948 GHz - 60,947,415,438 instructions # 3.02 insn per cycle - 6.853052511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.365200e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.380510e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.380510e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.893360 sec +INFO: No Floating Point Exceptions have been reported + 17,103,424,613 cycles:u # 3.492 GHz (75.01%) + 2,386,149 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 3,979,466,539 stalled-cycles-backend:u # 23.27% backend cycles idle (75.01%) + 57,719,685,111 instructions:u # 3.37 insn per cycle + # 0.07 stalled cycles per insn (75.01%) + 4.961790583 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1219) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.642526e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.687090e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.687090e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.549259 sec -INFO: No Floating Point Exceptions have been reported - 10,477,481,501 cycles # 2.949 GHz - 30,820,930,825 instructions # 2.94 insn per cycle - 3.553666211 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.397129e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.453309e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.453309e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.585749 sec +INFO: No Floating Point Exceptions have been reported + 8,951,817,863 cycles:u # 3.457 GHz (74.98%) + 2,245,079 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) + 2,261,108,188 stalled-cycles-backend:u # 25.26% backend cycles idle (74.98%) + 29,715,594,187 instructions:u # 3.32 insn per cycle + # 0.08 stalled cycles per insn (74.98%) + 2.706615239 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4755) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.196444e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.370621e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.370621e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.802004 sec -INFO: No Floating Point Exceptions have been reported - 4,965,652,288 cycles # 2.750 GHz - 11,359,248,854 instructions # 2.29 insn per cycle - 1.806342805 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.330087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.354301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.354301e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.256022 sec +INFO: No Floating Point Exceptions have been reported + 4,374,586,886 cycles:u # 3.473 GHz (74.72%) + 2,021,531 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.70%) + 1,211,058,173 stalled-cycles-backend:u # 27.68% backend cycles idle (75.02%) + 11,045,449,646 instructions:u # 2.52 insn per cycle + # 0.11 stalled cycles per insn (75.23%) + 1.312603944 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4405) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.041756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.063436e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.063436e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.592695 sec -INFO: No Floating Point Exceptions have been reported - 4,382,366,442 cycles # 2.746 GHz - 10,608,797,295 instructions # 2.42 insn per cycle - 1.596978533 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 84) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.957560e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.055998e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.055998e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.377375 sec -INFO: No Floating Point Exceptions have been reported - 4,237,877,454 cycles # 1.780 GHz - 6,168,521,326 instructions # 1.46 insn per cycle - 2.381770690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2143) (512y: 116) (512z: 3653) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index e31dab3bcb..77b409ec9c 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-09-18_17:15:42 -DATE: 2024-09-18_12:15:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.665772e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.933205e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.041936e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.474243 sec -INFO: No Floating Point Exceptions have been reported - 1,991,405,654 cycles # 2.879 GHz - 2,864,466,394 instructions # 1.44 insn per cycle - 0.749070557 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.444533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547416e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.549334e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.436104 sec +INFO: No Floating Point Exceptions have been reported + 1,205,766,715 cycles:u # 2.767 GHz (75.51%) + 2,657,595 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.40%) + 5,124,684 stalled-cycles-backend:u # 0.43% backend cycles idle (76.16%) + 1,646,974,323 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (76.43%) + 0.600457511 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.042210e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.228789e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.239192e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.610923 sec -INFO: No Floating Point Exceptions have been reported - 2,452,672,007 cycles # 2.882 GHz - 3,739,836,978 instructions # 1.52 insn per cycle - 0.910769372 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.623679e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.750011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.752595e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.708523 sec +INFO: No Floating Point Exceptions have been reported + 1,983,766,237 cycles:u # 2.710 GHz (75.43%) + 2,538,452 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.45%) + 7,166,285 stalled-cycles-backend:u # 0.36% backend cycles idle (75.98%) + 2,423,186,703 instructions:u # 1.22 insn per cycle + # 0.00 stalled cycles per insn (75.47%) + 0.771600202 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.379932e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.391571e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.391571e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.905036 sec -INFO: No Floating Point Exceptions have been reported - 20,270,175,803 cycles # 2.935 GHz - 61,175,514,110 instructions # 3.02 insn per cycle - 6.909213331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.400236e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.416076e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416076e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.843297 sec +INFO: No Floating Point Exceptions have been reported + 16,613,709,794 cycles:u # 3.428 GHz (74.93%) + 2,837,093 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) + 3,460,595,569 stalled-cycles-backend:u # 20.83% backend cycles idle (75.05%) + 57,390,447,744 instructions:u # 3.45 insn per cycle + # 0.06 stalled cycles per insn (75.08%) + 4.926799024 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 866) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.712701e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.757964e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.757964e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.496353 sec -INFO: No Floating Point Exceptions have been reported - 10,330,450,764 cycles # 2.952 GHz - 30,532,965,755 instructions # 2.96 insn per cycle - 3.500721812 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.398577e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.454629e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.454629e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.585324 sec +INFO: No Floating Point Exceptions have been reported + 8,774,135,894 cycles:u # 3.389 GHz (75.14%) + 1,487,872 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) + 1,834,490,066 stalled-cycles-backend:u # 20.91% backend cycles idle (74.97%) + 30,092,791,523 instructions:u # 3.43 insn per cycle + # 0.06 stalled cycles per insn (74.97%) + 2.675244899 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4834) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.873461e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.031366e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.031366e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.866310 sec -INFO: No Floating Point Exceptions have been reported - 5,149,448,063 cycles # 2.754 GHz - 11,872,714,422 instructions # 2.31 insn per cycle - 1.870704205 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.213312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234495e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.234495e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.375018 sec +INFO: No Floating Point Exceptions have been reported + 4,652,993,936 cycles:u # 3.375 GHz (75.05%) + 2,302,267 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) + 1,496,265,905 stalled-cycles-backend:u # 32.16% backend cycles idle (75.05%) + 11,669,982,634 instructions:u # 2.51 insn per cycle + # 0.13 stalled cycles per insn (75.05%) + 1.444304956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4625) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.721277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.910902e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.910902e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.705742 sec -INFO: No Floating Point Exceptions have been reported - 4,682,307,882 cycles # 2.740 GHz - 11,166,992,215 instructions # 2.38 insn per cycle - 1.710031590 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.916313e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.015099e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.015099e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.391177 sec -INFO: No Floating Point Exceptions have been reported - 4,255,173,095 cycles # 1.777 GHz - 6,409,630,981 instructions # 1.51 insn per cycle - 2.395610797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 162) (512z: 3731) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e60a3b56f2..59b911f1d8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:15:56 -DATE: 2024-09-18_12:16:23 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.313288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.338946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340893e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.535030 sec -INFO: No Floating Point Exceptions have been reported - 2,203,915,832 cycles # 2.863 GHz - 3,411,363,725 instructions # 1.55 insn per cycle - 0.826937803 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.204596e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259568e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.715128 sec +INFO: No Floating Point Exceptions have been reported + 1,635,437,665 cycles:u # 2.817 GHz (74.94%) + 2,495,684 stalled-cycles-frontend:u # 0.15% frontend cycles idle (76.17%) + 7,104,110 stalled-cycles-backend:u # 0.43% backend cycles idle (76.53%) + 2,096,604,985 instructions:u # 1.28 insn per cycle + # 0.00 stalled cycles per insn (74.05%) + 0.853269091 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139082e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.168902e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.170140e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.048810 sec -INFO: No Floating Point Exceptions have been reported - 9,673,114,822 cycles # 2.925 GHz - 22,022,328,349 instructions # 2.28 insn per cycle - 3.363974995 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.693369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698924e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.699024e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.646807 sec +INFO: No Floating Point Exceptions have been reported + 22,366,056,530 cycles:u # 3.352 GHz (75.06%) + 3,203,682 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.10%) + 7,189,313 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) + 19,993,459,196 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 6.710008304 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884766e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885678e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885678e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.707888 sec -INFO: No Floating Point Exceptions have been reported - 25,646,480,577 cycles # 2.944 GHz - 78,959,199,970 instructions # 3.08 insn per cycle - 8.712344144 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.649906e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.651120e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.651120e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.205871 sec +INFO: No Floating Point Exceptions have been reported + 21,471,105,477 cycles:u # 3.462 GHz (75.04%) + 1,097,352 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 3,029,846,667 stalled-cycles-backend:u # 14.11% backend cycles idle (74.98%) + 78,182,978,747 instructions:u # 3.64 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 6.331276745 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.525938e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.529103e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.529103e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.657640 sec -INFO: No Floating Point Exceptions have been reported - 13,102,337,051 cycles # 2.811 GHz - 39,559,050,978 instructions # 3.02 insn per cycle - 4.662071177 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.442187e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.447228e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.447228e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.031168 sec +INFO: No Floating Point Exceptions have been reported + 10,507,284,560 cycles:u # 3.471 GHz (74.91%) + 411,446 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.91%) + 1,338,802,016 stalled-cycles-backend:u # 12.74% backend cycles idle (74.94%) + 39,389,673,104 instructions:u # 3.75 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 3.139609397 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.037518e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.054750e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.054750e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.047044 sec -INFO: No Floating Point Exceptions have been reported - 5,613,016,028 cycles # 2.737 GHz - 13,823,575,120 instructions # 2.46 insn per cycle - 2.051472192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.218352e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.220871e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.220871e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.362754 sec +INFO: No Floating Point Exceptions have been reported + 4,685,851,873 cycles:u # 3.447 GHz (74.87%) + 2,379,754 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.73%) + 424,035,996 stalled-cycles-backend:u # 9.05% backend cycles idle (74.81%) + 13,809,013,925 instructions:u # 2.95 insn per cycle + # 0.03 stalled cycles per insn (75.11%) + 1.473377245 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.172996e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.194283e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.194283e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.794478 sec -INFO: No Floating Point Exceptions have been reported - 4,922,583,154 cycles # 2.738 GHz - 12,506,595,932 instructions # 2.54 insn per cycle - 1.798855063 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.987584e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.999990e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.999990e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.353771 sec -INFO: No Floating Point Exceptions have been reported - 4,138,447,690 cycles # 1.756 GHz - 6,393,230,519 instructions # 1.54 insn per cycle - 2.358130141 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 40b573a43c..f24b7b8ec2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:07:17 -DATE: 2024-09-18_12:55:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.976623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.275789e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.275789e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.524913 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,187,535,830 cycles # 2.870 GHz - 3,393,578,749 instructions # 1.55 insn per cycle - 0.821118226 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.220153e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.258909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.258909e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.567109 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,653,191,234 cycles:u # 2.832 GHz (75.46%) + 3,284,095 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.64%) + 49,343,637 stalled-cycles-backend:u # 2.98% backend cycles idle (75.12%) + 2,124,460,499 instructions:u # 1.29 insn per cycle + # 0.02 stalled cycles per insn (74.14%) + 0.620909696 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.647884e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.131075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.131075e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.300824 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,456,540,936 cycles # 2.924 GHz - 23,609,445,897 instructions # 2.26 insn per cycle - 3.631983066 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.633929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.694266e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.694266e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 7.474935 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 25,259,898,764 cycles:u # 3.364 GHz (74.99%) + 38,628,706 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.05%) + 1,093,566,156 stalled-cycles-backend:u # 4.33% backend cycles idle (75.04%) + 22,393,412,318 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 7.543274639 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884487e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885391e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885391e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.714099 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 25,667,183,320 cycles # 2.944 GHz - 78,962,641,614 instructions # 3.08 insn per cycle - 8.718743024 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.654658e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.655890e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.655890e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.189524 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 21,480,768,891 cycles:u # 3.469 GHz (74.94%) + 998,981 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.94%) + 2,950,116,993 stalled-cycles-backend:u # 13.73% backend cycles idle (74.95%) + 78,052,279,312 instructions:u # 3.63 insn per cycle + # 0.04 stalled cycles per insn (75.02%) + 6.198416917 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.526771e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.530011e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.530011e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.660484 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,111,732,509 cycles # 2.811 GHz - 39,572,349,146 instructions # 3.02 insn per cycle - 4.665178116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.336132e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.341069e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.341069e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.085441 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,494,758,685 cycles:u # 3.398 GHz (74.88%) + 437,035 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.90%) + 1,336,910,000 stalled-cycles-backend:u # 12.74% backend cycles idle (75.01%) + 39,380,034,552 instructions:u # 3.75 insn per cycle + # 0.03 stalled cycles per insn (75.12%) + 3.093820955 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.088722e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.106050e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.106050e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.038539 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,622,073,957 cycles # 2.753 GHz - 13,834,285,866 instructions # 2.46 insn per cycle - 2.043264664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.179996e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.182496e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182496e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.401929 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,719,141,237 cycles:u # 3.358 GHz (74.96%) + 362,701 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 548,949,913 stalled-cycles-backend:u # 11.63% backend cycles idle (74.96%) + 13,799,660,671 instructions:u # 2.92 insn per cycle + # 0.04 stalled cycles per insn (74.96%) + 1.432266244 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.180474e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.202665e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.202665e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.797187 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,937,816,969 cycles # 2.742 GHz - 12,516,988,109 instructions # 2.53 insn per cycle - 1.801993078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.979563e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.992901e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.992901e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.360775 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,161,078,836 cycles # 1.760 GHz - 6,405,054,232 instructions # 1.54 insn per cycle - 2.365459011 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 14d3e456fd..d96a0f8e8f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:12:07 -DATE: 2024-09-18_13:06:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.295730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.322229e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.324249e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.200527e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259546e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259705e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.520677 sec -INFO: No Floating Point Exceptions have been reported - 2,148,428,756 cycles # 2.846 GHz - 3,383,382,873 instructions # 1.57 insn per cycle - 0.814124974 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 0.562989 sec +INFO: No Floating Point Exceptions have been reported + 1,658,227,220 cycles:u # 2.870 GHz (75.30%) + 3,326,352 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.44%) + 46,729,112 stalled-cycles-backend:u # 2.82% backend cycles idle (74.98%) + 2,064,261,741 instructions:u # 1.24 insn per cycle + # 0.02 stalled cycles per insn (74.86%) + 0.612342330 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.133518e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.163906e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.165159e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.693965e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.700029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.700131e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.142841 sec -INFO: No Floating Point Exceptions have been reported - 9,913,272,493 cycles # 2.915 GHz - 21,406,834,972 instructions # 2.16 insn per cycle - 3.457413936 seconds time elapsed +TOTAL : 7.331623 sec +INFO: No Floating Point Exceptions have been reported + 24,894,559,524 cycles:u # 3.383 GHz (74.97%) + 28,356,340 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.04%) + 1,090,496,615 stalled-cycles-backend:u # 4.38% backend cycles idle (75.06%) + 21,642,040,039 instructions:u # 0.87 insn per cycle + # 0.05 stalled cycles per insn (74.98%) + 7.389428416 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.883085e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.883982e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.883982e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.624062e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.625295e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.625295e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.717475 sec -INFO: No Floating Point Exceptions have been reported - 25,650,608,073 cycles # 2.942 GHz - 78,955,783,568 instructions # 3.08 insn per cycle - 8.721774217 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.258293 sec +INFO: No Floating Point Exceptions have been reported + 21,742,847,303 cycles:u # 3.473 GHz (74.95%) + 1,362,438 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) + 3,135,647,236 stalled-cycles-backend:u # 14.42% backend cycles idle (74.96%) + 78,097,122,846 instructions:u # 3.59 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 6.262435029 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.525568e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.528763e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.528763e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.436883e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.442297e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.442297e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.659766 sec -INFO: No Floating Point Exceptions have been reported - 13,093,152,498 cycles # 2.808 GHz - 39,558,598,891 instructions # 3.02 insn per cycle - 4.664046020 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.025203 sec +INFO: No Floating Point Exceptions have been reported + 10,491,611,008 cycles:u # 3.466 GHz (74.90%) + 420,708 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.90%) + 1,336,455,062 stalled-cycles-backend:u # 12.74% backend cycles idle (74.90%) + 39,419,328,691 instructions:u # 3.76 insn per cycle + # 0.03 stalled cycles per insn (74.92%) + 3.029267840 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.044580e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.060991e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.060991e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.213233e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215919e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.215919e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.046898 sec -INFO: No Floating Point Exceptions have been reported - 5,615,094,940 cycles # 2.739 GHz - 13,822,846,005 instructions # 2.46 insn per cycle - 2.051140101 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +TOTAL : 1.360185 sec +INFO: No Floating Point Exceptions have been reported + 4,710,598,250 cycles:u # 3.458 GHz (74.76%) + 311,599 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.75%) + 547,714,218 stalled-cycles-backend:u # 11.63% backend cycles idle (74.66%) + 13,803,559,147 instructions:u # 2.93 insn per cycle + # 0.04 stalled cycles per insn (74.96%) + 1.364167546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.166765e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.187474e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.187474e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.797318 sec -INFO: No Floating Point Exceptions have been reported - 4,920,345,742 cycles # 2.732 GHz - 12,503,437,535 instructions # 2.54 insn per cycle - 1.801597465 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.991343e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.003364e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.003364e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.354011 sec -INFO: No Floating Point Exceptions have been reported - 4,141,327,319 cycles # 1.757 GHz - 6,390,315,143 instructions # 1.54 insn per cycle - 2.358468154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index bbefe2a8e4..13c360ffa0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,86 +1,69 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:10:18 -DATE: 2024-09-18_13:00:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.061167e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.349241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.351100e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.520345 sec -INFO: No Floating Point Exceptions have been reported - 2,166,689,738 cycles # 2.876 GHz - 3,445,065,863 instructions # 1.59 insn per cycle - 0.812996601 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.223746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259235e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259386e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.560758 sec +INFO: No Floating Point Exceptions have been reported + 1,658,584,653 cycles:u # 2.860 GHz (75.27%) + 3,387,727 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.88%) + 47,445,269 stalled-cycles-backend:u # 2.86% backend cycles idle (75.08%) + 2,123,150,894 instructions:u # 1.28 insn per cycle + # 0.02 stalled cycles per insn (74.34%) + 0.604288531 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.727515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.166918e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.168161e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.213299 sec -INFO: No Floating Point Exceptions have been reported - 10,170,057,078 cycles # 2.920 GHz - 23,084,015,508 instructions # 2.27 insn per cycle - 3.538645884 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.643561e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.699482e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.699583e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 7.437252 sec +INFO: No Floating Point Exceptions have been reported + 25,116,954,361 cycles:u # 3.371 GHz (74.99%) + 38,717,522 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.03%) + 1,093,087,629 stalled-cycles-backend:u # 4.35% backend cycles idle (75.05%) + 22,307,973,674 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (74.94%) + 7.495432364 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +71,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884169e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885066e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885066e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.710738 sec -INFO: No Floating Point Exceptions have been reported - 25,636,302,572 cycles # 2.942 GHz - 78,955,597,829 instructions # 3.08 insn per cycle - 8.714991120 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.662302e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.663534e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.663534e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.168443 sec +INFO: No Floating Point Exceptions have been reported + 21,434,527,823 cycles:u # 3.474 GHz (74.98%) + 808,257 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.98%) + 2,951,820,906 stalled-cycles-backend:u # 13.77% backend cycles idle (74.98%) + 78,020,642,412 instructions:u # 3.64 insn per cycle + # 0.04 stalled cycles per insn (74.98%) + 6.172663607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +106,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.516471e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519747e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.519747e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.670214 sec -INFO: No Floating Point Exceptions have been reported - 13,077,998,657 cycles # 2.798 GHz - 39,560,581,640 instructions # 3.02 insn per cycle - 4.674479243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.443660e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.449374e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.449374e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.020825 sec +INFO: No Floating Point Exceptions have been reported + 10,478,664,066 cycles:u # 3.467 GHz (74.86%) + 410,699 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.86%) + 1,332,399,511 stalled-cycles-backend:u # 12.72% backend cycles idle (74.91%) + 39,427,721,367 instructions:u # 3.76 insn per cycle + # 0.03 stalled cycles per insn (75.04%) + 3.024874706 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +141,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.487681e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.501903e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.501903e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.196522 sec -INFO: No Floating Point Exceptions have been reported - 6,031,690,352 cycles # 2.742 GHz - 13,823,991,565 instructions # 2.29 insn per cycle - 2.200855114 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.216044e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.218547e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.218547e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.357082 sec +INFO: No Floating Point Exceptions have been reported + 4,705,478,462 cycles:u # 3.462 GHz (74.70%) + 312,720 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.70%) + 548,334,891 stalled-cycles-backend:u # 11.65% backend cycles idle (74.85%) + 13,827,779,889 instructions:u # 2.94 insn per cycle + # 0.04 stalled cycles per insn (75.14%) + 1.361232418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,76 +176,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.160431e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.182355e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.182355e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.796714 sec -INFO: No Floating Point Exceptions have been reported - 4,915,575,489 cycles # 2.731 GHz - 12,505,831,482 instructions # 2.54 insn per cycle - 1.801025403 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.864510e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.876687e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.876687e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.395867 sec -INFO: No Floating Point Exceptions have been reported - 4,162,633,573 cycles # 1.735 GHz - 6,392,322,352 instructions # 1.54 insn per cycle - 2.400290914 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 724af1477d..fa3d209f08 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:16:21 -DATE: 2024-09-18_12:16:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.313099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.338786e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.340713e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536981 sec -INFO: No Floating Point Exceptions have been reported - 2,207,441,775 cycles # 2.862 GHz - 3,435,949,472 instructions # 1.56 insn per cycle - 0.828882621 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.220747e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.276283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.276432e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.633489 sec +INFO: No Floating Point Exceptions have been reported + 1,598,196,734 cycles:u # 2.869 GHz (76.05%) + 2,588,604 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.88%) + 6,239,197 stalled-cycles-backend:u # 0.39% backend cycles idle (75.30%) + 2,001,756,680 instructions:u # 1.25 insn per cycle + # 0.00 stalled cycles per insn (75.71%) + 0.793732399 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.143532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.173218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.174465e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.036857 sec -INFO: No Floating Point Exceptions have been reported - 9,590,647,679 cycles # 2.910 GHz - 22,042,753,111 instructions # 2.30 insn per cycle - 3.351786655 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.692168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.697598e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.697698e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.487899 sec +INFO: No Floating Point Exceptions have been reported + 21,985,727,628 cycles:u # 3.384 GHz (75.00%) + 3,266,647 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) + 7,047,220 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) + 19,652,848,907 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (74.94%) + 6.554364423 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.884971e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.885850e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.885850e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.706995 sec -INFO: No Floating Point Exceptions have been reported - 25,617,517,247 cycles # 2.941 GHz - 78,701,000,615 instructions # 3.07 insn per cycle - 8.711338338 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.643473e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.644776e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.644776e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.220674 sec +INFO: No Floating Point Exceptions have been reported + 21,581,680,162 cycles:u # 3.472 GHz (74.93%) + 1,008,607 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.92%) + 2,799,185,193 stalled-cycles-backend:u # 12.97% backend cycles idle (75.01%) + 78,073,639,400 instructions:u # 3.62 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 6.303141919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4695) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.566075e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.569356e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.569356e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.605078 sec -INFO: No Floating Point Exceptions have been reported - 13,036,001,618 cycles # 2.829 GHz - 39,449,493,817 instructions # 3.03 insn per cycle - 4.609408106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.420836e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.426085e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.426085e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.042422 sec +INFO: No Floating Point Exceptions have been reported + 10,517,293,190 cycles:u # 3.462 GHz (75.02%) + 809,943 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 1,446,124,865 stalled-cycles-backend:u # 13.75% backend cycles idle (75.01%) + 39,401,088,959 instructions:u # 3.75 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 3.114756938 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.966836e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.982546e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.982546e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.064889 sec -INFO: No Floating Point Exceptions have been reported - 5,676,808,859 cycles # 2.745 GHz - 13,911,294,100 instructions # 2.45 insn per cycle - 2.069253381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.229527e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.232471e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.232471e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.359120 sec +INFO: No Floating Point Exceptions have been reported + 4,645,902,790 cycles:u # 3.449 GHz (74.98%) + 572,318 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.06%) + 429,391,951 stalled-cycles-backend:u # 9.24% backend cycles idle (75.09%) + 13,818,574,096 instructions:u # 2.97 insn per cycle + # 0.03 stalled cycles per insn (75.09%) + 1.452466793 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10220) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.081065e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.102389e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.102389e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.812330 sec -INFO: No Floating Point Exceptions have been reported - 4,986,765,093 cycles # 2.746 GHz - 12,602,417,777 instructions # 2.53 insn per cycle - 1.816710814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 241) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.944688e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.956851e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.956851e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.368181 sec -INFO: No Floating Point Exceptions have been reported - 4,157,079,693 cycles # 1.753 GHz - 6,500,343,598 instructions # 1.56 insn per cycle - 2.372472342 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1754) (512y: 193) (512z: 9382) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 9c62ee596f..963c30ad93 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:57:30 -DATE: 2024-09-18_12:45:20 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.107911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.129674e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.131105e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.542394 sec -INFO: No Floating Point Exceptions have been reported - 2,239,231,771 cycles # 2.882 GHz - 3,498,325,403 instructions # 1.56 insn per cycle - 0.833980513 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.209030e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267353e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267500e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.561504 sec +INFO: No Floating Point Exceptions have been reported + 1,650,301,145 cycles:u # 2.885 GHz (75.14%) + 2,510,405 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.07%) + 5,346,111 stalled-cycles-backend:u # 0.32% backend cycles idle (75.69%) + 2,016,627,541 instructions:u # 1.22 insn per cycle + # 0.00 stalled cycles per insn (75.39%) + 0.613700386 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.758554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.783710e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784685e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.309606 sec -INFO: No Floating Point Exceptions have been reported - 10,428,358,714 cycles # 2.922 GHz - 23,876,781,455 instructions # 2.29 insn per cycle - 3.623869439 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.692966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.698428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.698530e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.642295 sec +INFO: No Floating Point Exceptions have been reported + 22,284,819,763 cycles:u # 3.342 GHz (75.02%) + 3,323,965 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.08%) + 5,858,249 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) + 19,822,694,807 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 6.709793750 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.278929e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.279396e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.279396e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.335228 sec -INFO: No Floating Point Exceptions have been reported - 112,569,296,340 cycles # 2.936 GHz - 144,793,904,773 instructions # 1.29 insn per cycle - 38.339626690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.517393e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.517775e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.517775e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 36.315090 sec +INFO: No Floating Point Exceptions have been reported + 125,097,558,299 cycles:u # 3.445 GHz (75.01%) + 66,311,042 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.00%) + 9,731,272,148 stalled-cycles-backend:u # 7.78% backend cycles idle (75.00%) + 141,068,944,658 instructions:u # 1.13 insn per cycle + # 0.07 stalled cycles per insn (75.00%) + 36.322945141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.146613e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.149188e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.149188e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.218296 sec -INFO: No Floating Point Exceptions have been reported - 14,745,365,482 cycles # 2.824 GHz - 37,604,718,701 instructions # 2.55 insn per cycle - 5.222619147 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.494754e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.496840e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496840e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.701163 sec +INFO: No Floating Point Exceptions have been reported + 16,292,346,635 cycles:u # 3.464 GHz (75.00%) + 854,652 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) + 7,155,618,960 stalled-cycles-backend:u # 43.92% backend cycles idle (75.00%) + 37,489,495,775 instructions:u # 2.30 insn per cycle + # 0.19 stalled cycles per insn (75.00%) + 4.708988517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.373915e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.387237e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.387237e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.230777 sec -INFO: No Floating Point Exceptions have been reported - 6,114,551,945 cycles # 2.737 GHz - 13,052,964,850 instructions # 2.13 insn per cycle - 2.235150749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46946) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.005875e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.014158e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.014158e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.349048 sec +INFO: No Floating Point Exceptions have been reported + 8,128,107,621 cycles:u # 3.456 GHz (74.86%) + 180,109,446 stalled-cycles-frontend:u # 2.22% frontend cycles idle (74.83%) + 4,282,191,528 stalled-cycles-backend:u # 52.68% backend cycles idle (74.86%) + 12,905,149,783 instructions:u # 1.59 insn per cycle + # 0.33 stalled cycles per insn (75.01%) + 2.396249654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46482) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.869797e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.889489e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.889489e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.855437 sec -INFO: No Floating Point Exceptions have been reported - 5,079,069,827 cycles # 2.732 GHz - 11,450,297,808 instructions # 2.25 insn per cycle - 1.859852844 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40486) (512y: 285) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.334322e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.348410e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.348410e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.242689 sec -INFO: No Floating Point Exceptions have been reported - 3,955,754,497 cycles # 1.761 GHz - 5,927,045,148 instructions # 1.50 insn per cycle - 2.247181135 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39338) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index af0b172ab7..f8b4184335 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:58:30 -DATE: 2024-09-18_12:46:29 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.101802e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.121265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.122962e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.540843 sec -INFO: No Floating Point Exceptions have been reported - 2,223,817,024 cycles # 2.871 GHz - 3,385,583,234 instructions # 1.52 insn per cycle - 0.831231377 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.222842e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.277911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.278067e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.541565 sec +INFO: No Floating Point Exceptions have been reported + 1,576,107,912 cycles:u # 2.847 GHz (74.89%) + 2,546,010 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.23%) + 5,315,341 stalled-cycles-backend:u # 0.34% backend cycles idle (75.01%) + 2,015,562,862 instructions:u # 1.28 insn per cycle + # 0.00 stalled cycles per insn (74.53%) + 0.594897242 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.740756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765684e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.766660e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.304935 sec -INFO: No Floating Point Exceptions have been reported - 10,396,192,831 cycles # 2.917 GHz - 23,795,713,123 instructions # 2.29 insn per cycle - 3.619511438 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.691657e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.697063e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.697163e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.462346 sec +INFO: No Floating Point Exceptions have been reported + 21,988,430,194 cycles:u # 3.388 GHz (74.92%) + 3,160,782 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 7,539,204 stalled-cycles-backend:u # 0.03% backend cycles idle (75.10%) + 19,611,940,250 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 6.531548390 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.220488e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.220945e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.220945e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.865543 sec -INFO: No Floating Point Exceptions have been reported - 114,075,746,984 cycles # 2.935 GHz - 144,284,837,728 instructions # 1.26 insn per cycle - 38.869913276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.598266e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.598631e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.598631e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 35.673351 sec +INFO: No Floating Point Exceptions have been reported + 123,630,854,288 cycles:u # 3.466 GHz (75.00%) + 18,173,436 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) + 11,350,077,369 stalled-cycles-backend:u # 9.18% backend cycles idle (75.00%) + 141,030,575,887 instructions:u # 1.14 insn per cycle + # 0.08 stalled cycles per insn (75.00%) + 35.681240408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21174) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140450E-004 -Relative difference = 2.83729918072716e-07 +Avg ME (F77/C++) = 6.6266731198140482E-004 +Relative difference = 2.8372991758188064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.002635e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.004951e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.004951e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.467988 sec -INFO: No Floating Point Exceptions have been reported - 15,296,909,197 cycles # 2.796 GHz - 37,837,176,497 instructions # 2.47 insn per cycle - 5.472337784 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.350552e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.352439e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352439e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.902764 sec +INFO: No Floating Point Exceptions have been reported + 16,620,667,301 cycles:u # 3.388 GHz (74.95%) + 14,696,809 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.90%) + 6,330,044,323 stalled-cycles-backend:u # 38.09% backend cycles idle (74.95%) + 37,574,901,227 instructions:u # 2.26 insn per cycle + # 0.17 stalled cycles per insn (75.03%) + 4.910500106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.512966e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.527080e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.527080e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.189426 sec -INFO: No Floating Point Exceptions have been reported - 6,002,714,707 cycles # 2.737 GHz - 12,921,820,063 instructions # 2.15 insn per cycle - 2.193921042 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46048) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.656480e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.666413e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.666413e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.150332 sec +INFO: No Floating Point Exceptions have been reported + 7,419,558,099 cycles:u # 3.446 GHz (74.80%) + 348,936 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 4,274,680,056 stalled-cycles-backend:u # 57.61% backend cycles idle (75.11%) + 12,764,120,988 instructions:u # 1.72 insn per cycle + # 0.33 stalled cycles per insn (75.11%) + 2.158658646 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45597) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.859047e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.878725e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.878725e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.857736 sec -INFO: No Floating Point Exceptions have been reported - 5,096,589,479 cycles # 2.738 GHz - 11,450,886,914 instructions # 2.25 insn per cycle - 1.862161811 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40151) (512y: 219) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.316370e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.329769e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.329769e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.247849 sec -INFO: No Floating Point Exceptions have been reported - 3,953,949,727 cycles # 1.756 GHz - 5,894,038,279 instructions # 1.49 insn per cycle - 2.252346875 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38977) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 90e270bc8d..35cdb26a8c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:16:45 -DATE: 2024-09-18_12:17:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.485010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.524901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.528717e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493784 sec -INFO: No Floating Point Exceptions have been reported - 2,047,150,406 cycles # 2.872 GHz - 3,017,206,545 instructions # 1.47 insn per cycle - 0.769521849 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.892908e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027391e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.491369 sec +INFO: No Floating Point Exceptions have been reported + 1,374,101,883 cycles:u # 2.727 GHz (76.14%) + 2,546,959 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.25%) + 6,128,063 stalled-cycles-backend:u # 0.45% backend cycles idle (75.26%) + 1,919,788,005 instructions:u # 1.40 insn per cycle + # 0.00 stalled cycles per insn (75.43%) + 0.680011428 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.130872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.191030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.193752e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.801872 sec -INFO: No Floating Point Exceptions have been reported - 5,918,779,581 cycles # 2.909 GHz - 12,693,441,452 instructions # 2.14 insn per cycle - 2.093566853 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.067138e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.080898e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081071e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.970325 sec +INFO: No Floating Point Exceptions have been reported + 13,404,801,231 cycles:u # 3.359 GHz (74.86%) + 2,944,990 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) + 5,995,644 stalled-cycles-backend:u # 0.04% backend cycles idle (75.17%) + 12,191,388,620 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 4.031650894 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.942960e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943920e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943920e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.446218 sec -INFO: No Floating Point Exceptions have been reported - 24,891,970,806 cycles # 2.946 GHz - 79,110,184,615 instructions # 3.18 insn per cycle - 8.450517031 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.705369e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.706685e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.706685e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.068861 sec +INFO: No Floating Point Exceptions have been reported + 21,067,114,413 cycles:u # 3.470 GHz (74.97%) + 1,636,155 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) + 2,737,208,964 stalled-cycles-backend:u # 12.99% backend cycles idle (74.97%) + 78,062,863,314 instructions:u # 3.71 insn per cycle + # 0.04 stalled cycles per insn (74.98%) + 6.076375494 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.000853e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.014105e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.014105e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.347412 sec -INFO: No Floating Point Exceptions have been reported - 6,535,913,878 cycles # 2.780 GHz - 20,270,850,285 instructions # 3.10 insn per cycle - 2.351723425 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.073506e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.075556e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.075556e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.534629 sec +INFO: No Floating Point Exceptions have been reported + 5,291,209,026 cycles:u # 3.441 GHz (75.04%) + 208,389 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.03%) + 729,565,068 stalled-cycles-backend:u # 13.79% backend cycles idle (75.03%) + 20,300,438,713 instructions:u # 3.84 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 1.575316034 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.599290e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.605892e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.605892e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.030594 sec -INFO: No Floating Point Exceptions have been reported - 2,836,963,276 cycles # 2.743 GHz - 7,065,994,832 instructions # 2.49 insn per cycle - 1.034860296 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.386743e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.396709e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.396709e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.703117 sec +INFO: No Floating Point Exceptions have been reported + 2,403,118,888 cycles:u # 3.444 GHz (74.96%) + 168,146 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.84%) + 213,761,143 stalled-cycles-backend:u # 8.90% backend cycles idle (74.89%) + 7,023,703,794 instructions:u # 2.92 insn per cycle + # 0.03 stalled cycles per insn (74.89%) + 0.751204545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.795295e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.803482e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.803482e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.918514 sec -INFO: No Floating Point Exceptions have been reported - 2,528,652,589 cycles # 2.743 GHz - 6,403,959,518 instructions # 2.53 insn per cycle - 0.922696206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.410082e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.415209e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.415209e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.168400 sec -INFO: No Floating Point Exceptions have been reported - 2,072,435,771 cycles # 1.768 GHz - 3,304,546,208 instructions # 1.59 insn per cycle - 1.172720772 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index c66db7ae78..fb46fc2cdd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:07:41 -DATE: 2024-09-18_12:55:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.970193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.498612e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.498612e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.479926 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,011,379,748 cycles # 2.884 GHz - 3,038,247,862 instructions # 1.51 insn per cycle - 0.753810180 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.934278e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.028896e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.028896e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 +TOTAL : 0.542710 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,531,785,718 cycles:u # 2.842 GHz (72.77%) + 3,064,573 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.47%) + 76,194,677 stalled-cycles-backend:u # 4.97% backend cycles idle (76.32%) + 1,782,357,687 instructions:u # 1.16 insn per cycle + # 0.04 stalled cycles per insn (76.30%) + 0.592592082 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.940879e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.083233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.083233e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.970426 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,440,275,548 cycles # 2.913 GHz - 13,287,281,132 instructions # 2.06 insn per cycle - 2.267959957 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.923948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119756e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119756e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 5.393986 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,913,453,356 cycles:u # 3.304 GHz (74.92%) + 30,388,586 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.06%) + 2,151,686,487 stalled-cycles-backend:u # 12.01% backend cycles idle (75.09%) + 14,392,276,960 instructions:u # 0.80 insn per cycle + # 0.15 stalled cycles per insn (74.98%) + 5.462019383 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.936854e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.937817e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.937817e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.475178 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 24,927,059,080 cycles # 2.940 GHz - 79,118,119,354 instructions # 3.17 insn per cycle - 8.479535627 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.670433e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.671622e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.671622e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.149800 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 21,343,747,009 cycles:u # 3.469 GHz (74.98%) + 18,868,954 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.03%) + 2,731,556,155 stalled-cycles-backend:u # 12.80% backend cycles idle (75.04%) + 78,007,569,798 instructions:u # 3.65 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 6.157496432 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.002962e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.015509e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.015509e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.349551 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,544,667,804 cycles # 2.781 GHz - 20,279,974,113 instructions # 3.10 insn per cycle - 2.353974711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.073610e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.075603e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.075603e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.536410 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,300,583,999 cycles:u # 3.443 GHz (74.86%) + 468,795 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.07%) + 677,533,091 stalled-cycles-backend:u # 12.78% backend cycles idle (75.06%) + 20,323,599,590 instructions:u # 3.83 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 1.544342366 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.603853e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.610574e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.610574e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.030576 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,847,456,615 cycles # 2.753 GHz - 7,075,989,633 instructions # 2.49 insn per cycle - 1.035024707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.383967e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.393992e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393992e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.696778 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,387,404,039 cycles:u # 3.412 GHz (74.86%) + 231,790 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.86%) + 221,182,116 stalled-cycles-backend:u # 9.26% backend cycles idle (74.86%) + 7,014,055,965 instructions:u # 2.94 insn per cycle + # 0.03 stalled cycles per insn (74.86%) + 0.704363424 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.785349e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.793696e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.793696e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.926691 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,540,934,134 cycles # 2.731 GHz - 6,413,438,200 instructions # 2.52 insn per cycle - 0.931148836 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.400821e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.405962e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.405962e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.179178 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,081,047,712 cycles # 1.760 GHz - 3,314,864,763 instructions # 1.59 insn per cycle - 1.183503546 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 3aa8ed158e..42df23ca66 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:12:31 -DATE: 2024-09-18_13:06:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.472678e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.513045e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.517154e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 -TOTAL : 0.480872 sec -INFO: No Floating Point Exceptions have been reported - 2,012,103,314 cycles # 2.880 GHz - 2,956,061,319 instructions # 1.47 insn per cycle - 0.756135044 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.900910e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.043919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044218e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.205840e-01 +- 3.252482e-01 ) GeV^-4 +TOTAL : 0.510575 sec +INFO: No Floating Point Exceptions have been reported + 1,486,205,403 cycles:u # 2.824 GHz (76.02%) + 2,946,687 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.79%) + 76,773,959 stalled-cycles-backend:u # 5.17% backend cycles idle (75.45%) + 1,853,207,947 instructions:u # 1.25 insn per cycle + # 0.04 stalled cycles per insn (75.11%) + 0.556110538 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.032542e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.093526e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.096446e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.887302 sec -INFO: No Floating Point Exceptions have been reported - 6,151,892,700 cycles # 2.911 GHz - 12,903,540,079 instructions # 2.10 insn per cycle - 2.177167582 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.084290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.100734e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.100914e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183835e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 5.241201 sec +INFO: No Floating Point Exceptions have been reported + 17,557,459,247 cycles:u # 3.336 GHz (75.14%) + 19,889,271 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.08%) + 6,675,796 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) + 13,522,683,695 instructions:u # 0.77 insn per cycle + # 0.00 stalled cycles per insn (74.96%) + 5.297307713 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.942290e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943248e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943248e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.449460 sec -INFO: No Floating Point Exceptions have been reported - 24,927,677,850 cycles # 2.949 GHz - 79,113,674,015 instructions # 3.17 insn per cycle - 8.453509271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.621454e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.622759e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622759e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.262909 sec +INFO: No Floating Point Exceptions have been reported + 21,174,605,451 cycles:u # 3.380 GHz (74.97%) + 16,537,883 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.97%) + 2,687,542,026 stalled-cycles-backend:u # 12.69% backend cycles idle (74.97%) + 78,051,863,948 instructions:u # 3.69 insn per cycle + # 0.03 stalled cycles per insn (74.97%) + 6.266907844 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.966325e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.979405e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.979405e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.360667 sec -INFO: No Floating Point Exceptions have been reported - 6,536,812,483 cycles # 2.766 GHz - 20,271,244,947 instructions # 3.10 insn per cycle - 2.364721005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.063819e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.065973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.065973e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.548640 sec +INFO: No Floating Point Exceptions have been reported + 5,278,056,370 cycles:u # 3.404 GHz (74.72%) + 682,875 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.79%) + 683,912,947 stalled-cycles-backend:u # 12.96% backend cycles idle (75.05%) + 20,308,587,082 instructions:u # 3.85 insn per cycle + # 0.03 stalled cycles per insn (75.24%) + 1.552537130 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.594039e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.600583e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.600583e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.034746 sec -INFO: No Floating Point Exceptions have been reported - 2,840,398,673 cycles # 2.736 GHz - 7,064,163,701 instructions # 2.49 insn per cycle - 1.038926233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.374945e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.385404e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.385404e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.697351 sec +INFO: No Floating Point Exceptions have been reported + 2,403,349,263 cycles:u # 3.436 GHz (74.85%) + 1,567,151 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.84%) + 219,778,799 stalled-cycles-backend:u # 9.14% backend cycles idle (74.84%) + 7,044,721,523 instructions:u # 2.93 insn per cycle + # 0.03 stalled cycles per insn (74.84%) + 0.701360541 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.789304e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.797829e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.797829e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.922641 sec -INFO: No Floating Point Exceptions have been reported - 2,530,877,890 cycles # 2.733 GHz - 6,400,607,448 instructions # 2.53 insn per cycle - 0.926747674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.398241e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403280e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403280e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.179849 sec -INFO: No Floating Point Exceptions have been reported - 2,072,557,863 cycles # 1.752 GHz - 3,302,114,927 instructions # 1.59 insn per cycle - 1.183970001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 7797c46a19..1a7909a978 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,86 +1,69 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:10:42 -DATE: 2024-09-18_13:00:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.992477e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.494287e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.498231e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.481963 sec -INFO: No Floating Point Exceptions have been reported - 1,997,965,324 cycles # 2.853 GHz - 2,939,834,102 instructions # 1.47 insn per cycle - 0.757193064 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.942949e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033746e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.034076e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 +TOTAL : 0.506941 sec +INFO: No Floating Point Exceptions have been reported + 1,520,307,429 cycles:u # 2.882 GHz (75.27%) + 3,258,324 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.51%) + 68,532,209 stalled-cycles-backend:u # 4.51% backend cycles idle (74.25%) + 1,858,236,746 instructions:u # 1.22 insn per cycle + # 0.04 stalled cycles per insn (73.66%) + 0.556529800 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.118190e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.192549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.195361e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.905617 sec -INFO: No Floating Point Exceptions have been reported - 6,204,653,970 cycles # 2.904 GHz - 11,932,036,366 instructions # 1.92 insn per cycle - 2.194579719 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.914615e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.095133e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 5.333640 sec +INFO: No Floating Point Exceptions have been reported + 18,043,629,136 cycles:u # 3.366 GHz (75.08%) + 30,079,050 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.09%) + 2,134,096,420 stalled-cycles-backend:u # 11.83% backend cycles idle (75.01%) + 14,503,297,107 instructions:u # 0.80 insn per cycle + # 0.15 stalled cycles per insn (75.03%) + 5.389006417 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +71,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.934899e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935850e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935850e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.481229 sec -INFO: No Floating Point Exceptions have been reported - 24,933,908,474 cycles # 2.939 GHz - 79,109,779,876 instructions # 3.17 insn per cycle - 8.485474778 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.658893e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.660119e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.660119e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.174952 sec +INFO: No Floating Point Exceptions have been reported + 21,311,188,928 cycles:u # 3.450 GHz (75.00%) + 17,843,226 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.00%) + 2,713,676,089 stalled-cycles-backend:u # 12.73% backend cycles idle (75.00%) + 78,004,437,887 instructions:u # 3.66 insn per cycle + # 0.03 stalled cycles per insn (75.00%) + 6.178866187 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +106,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.954399e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.967143e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.967143e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.363281 sec -INFO: No Floating Point Exceptions have been reported - 6,535,222,026 cycles # 2.761 GHz - 20,271,091,445 instructions # 3.10 insn per cycle - 2.367564480 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.037398e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.039470e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.039470e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.587755 sec +INFO: No Floating Point Exceptions have been reported + 5,288,866,019 cycles:u # 3.327 GHz (74.84%) + 227,676 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.84%) + 668,093,552 stalled-cycles-backend:u # 12.63% backend cycles idle (74.84%) + 20,316,507,500 instructions:u # 3.84 insn per cycle + # 0.03 stalled cycles per insn (74.93%) + 1.591582006 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +141,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.592187e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.598658e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.598658e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.035113 sec -INFO: No Floating Point Exceptions have been reported - 2,837,322,925 cycles # 2.732 GHz - 7,065,851,947 instructions # 2.49 insn per cycle - 1.039614272 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.330008e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.340797e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340797e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.710511 sec +INFO: No Floating Point Exceptions have been reported + 2,380,119,927 cycles:u # 3.341 GHz (75.30%) + 557,625 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.30%) + 217,294,961 stalled-cycles-backend:u # 9.13% backend cycles idle (75.30%) + 7,015,459,296 instructions:u # 2.95 insn per cycle + # 0.03 stalled cycles per insn (75.30%) + 0.714337492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,76 +176,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.786472e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.794657e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.794657e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.923192 sec -INFO: No Floating Point Exceptions have been reported - 2,528,197,649 cycles # 2.730 GHz - 6,403,497,083 instructions # 2.53 insn per cycle - 0.927414591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.394144e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.399234e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.399234e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.181792 sec -INFO: No Floating Point Exceptions have been reported - 2,068,985,618 cycles # 1.745 GHz - 3,303,850,767 instructions # 1.60 insn per cycle - 1.186123644 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 9b731718b7..d479d256f1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:17:03 -DATE: 2024-09-18_12:17:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.454590e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.492804e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.497193e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.495704 sec -INFO: No Floating Point Exceptions have been reported - 2,032,995,153 cycles # 2.848 GHz - 2,991,224,667 instructions # 1.47 insn per cycle - 0.774166376 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.891386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.024659e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.024962e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.491872 sec +INFO: No Floating Point Exceptions have been reported + 1,367,931,450 cycles:u # 2.716 GHz (76.22%) + 2,444,009 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.88%) + 7,290,356 stalled-cycles-backend:u # 0.53% backend cycles idle (75.72%) + 1,873,529,608 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (74.64%) + 0.548452272 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.094149e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.154905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.157610e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.805319 sec -INFO: No Floating Point Exceptions have been reported - 5,914,324,613 cycles # 2.902 GHz - 11,873,756,543 instructions # 2.01 insn per cycle - 2.096430893 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.088094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.102024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.102211e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.951404 sec +INFO: No Floating Point Exceptions have been reported + 13,133,485,956 cycles:u # 3.324 GHz (75.01%) + 3,056,975 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.01%) + 6,407,169 stalled-cycles-backend:u # 0.05% backend cycles idle (74.85%) + 12,133,816,753 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (74.85%) + 4.011802643 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.929536e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.930480e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.930480e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.504728 sec -INFO: No Floating Point Exceptions have been reported - 25,015,654,943 cycles # 2.941 GHz - 78,847,702,433 instructions # 3.15 insn per cycle - 8.508857223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.709215e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.710598e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.710598e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.059935 sec +INFO: No Floating Point Exceptions have been reported + 21,003,004,134 cycles:u # 3.465 GHz (74.93%) + 1,153,390 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) + 2,724,239,552 stalled-cycles-backend:u # 12.97% backend cycles idle (75.02%) + 77,982,313,980 instructions:u # 3.71 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 6.066960240 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866250177339E-004 -Relative difference = 5.65798569465384e-08 +Avg ME (F77/C++) = 6.6274868874222764E-004 +Relative difference = 1.698648731198014e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.178831e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.192718e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.192718e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.289338 sec -INFO: No Floating Point Exceptions have been reported - 6,463,318,702 cycles # 2.819 GHz - 20,229,880,790 instructions # 3.13 insn per cycle - 2.293529801 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.081042e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.083133e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083133e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.523801 sec +INFO: No Floating Point Exceptions have been reported + 5,298,511,047 cycles:u # 3.471 GHz (74.70%) + 212,156 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.85%) + 741,731,752 stalled-cycles-backend:u # 14.00% backend cycles idle (74.85%) + 20,314,215,061 instructions:u # 3.83 insn per cycle + # 0.04 stalled cycles per insn (74.88%) + 1.530800363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861448331612E-004 -Relative difference = 2.1853408865157068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.520587e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.526569e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.526569e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.083432 sec -INFO: No Floating Point Exceptions have been reported - 2,984,403,957 cycles # 2.746 GHz - 7,207,167,499 instructions # 2.41 insn per cycle - 1.087697042 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.387867e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.398246e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.398246e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.708932 sec +INFO: No Floating Point Exceptions have been reported + 2,404,304,951 cycles:u # 3.447 GHz (74.71%) + 164,227 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.85%) + 257,148,789 stalled-cycles-backend:u # 10.70% backend cycles idle (74.85%) + 7,021,443,847 instructions:u # 2.92 insn per cycle + # 0.04 stalled cycles per insn (74.78%) + 0.766054321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10773) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.733677e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.741677e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.741677e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.950819 sec -INFO: No Floating Point Exceptions have been reported - 2,611,989,316 cycles # 2.737 GHz - 6,545,448,351 instructions # 2.51 insn per cycle - 0.954971597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 27) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.366907e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.371833e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.371833e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.204851 sec -INFO: No Floating Point Exceptions have been reported - 2,138,789,905 cycles # 1.770 GHz - 3,461,611,954 instructions # 1.62 insn per cycle - 1.209183599 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952032316561E-004 -Relative difference = 3.066631594207157e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 2cbba9e698..cd1148f688 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:59:30 -DATE: 2024-09-18_12:47:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.579593e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.616784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.620542e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.508462 sec -INFO: No Floating Point Exceptions have been reported - 2,050,083,222 cycles # 2.848 GHz - 2,995,129,166 instructions # 1.46 insn per cycle - 0.787145254 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.913343e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049266e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049574e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.488690 sec +INFO: No Floating Point Exceptions have been reported + 1,394,377,530 cycles:u # 2.786 GHz (75.85%) + 2,538,433 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.30%) + 5,232,172 stalled-cycles-backend:u # 0.38% backend cycles idle (75.35%) + 1,817,251,691 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (76.01%) + 0.543742736 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.605413e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.675177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.678183e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.737190 sec -INFO: No Floating Point Exceptions have been reported - 5,761,752,713 cycles # 2.921 GHz - 12,131,218,179 instructions # 2.11 insn per cycle - 2.028782459 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.056819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.070234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.070408e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 4.103063 sec +INFO: No Floating Point Exceptions have been reported + 13,399,052,407 cycles:u # 3.351 GHz (75.07%) + 3,050,420 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.01%) + 7,952,892 stalled-cycles-backend:u # 0.06% backend cycles idle (75.00%) + 12,253,881,436 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (74.87%) + 4.159583942 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.602317e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.603102e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.603102e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.279245 sec -INFO: No Floating Point Exceptions have been reported - 85,920,999,170 cycles # 2.934 GHz - 135,650,935,446 instructions # 1.58 insn per cycle - 29.283501695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.770941e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.771498e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.771498e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 28.425558 sec +INFO: No Floating Point Exceptions have been reported + 98,567,509,459 cycles:u # 3.467 GHz (74.99%) + 298,641,860 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.98%) + 5,269,027,296 stalled-cycles-backend:u # 5.35% backend cycles idle (75.00%) + 132,438,029,653 instructions:u # 1.34 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 28.433002977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:17007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349717465765E-004 -Relative difference = 4.26303654465793e-09 +Avg ME (F77/C++) = 6.6275346655336742E-004 +Relative difference = 5.0466172741879477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.859267e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.871489e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.871489e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.395975 sec -INFO: No Floating Point Exceptions have been reported - 6,767,487,912 cycles # 2.821 GHz - 19,352,953,840 instructions # 2.86 insn per cycle - 2.400276342 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.145293e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.156991e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156991e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 2.020277 sec +INFO: No Floating Point Exceptions have been reported + 7,031,832,622 cycles:u # 3.476 GHz (74.85%) + 4,625,174 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.03%) + 3,108,170,327 stalled-cycles-backend:u # 44.20% backend cycles idle (75.09%) + 19,163,359,990 instructions:u # 2.73 insn per cycle + # 0.16 stalled cycles per insn (75.09%) + 2.027504768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69115) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862748188362E-004 -Relative difference = 4.14665283800746e-08 +Avg ME (F77/C++) = 6.6274857190509046E-004 +Relative difference = 4.239150340994169e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.427993e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.433168e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.433168e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.153582 sec -INFO: No Floating Point Exceptions have been reported - 3,172,176,609 cycles # 2.741 GHz - 6,794,912,676 instructions # 2.14 insn per cycle - 1.157865028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49034) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.464625e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.468364e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.468364e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.126513 sec +INFO: No Floating Point Exceptions have been reported + 3,904,723,679 cycles:u # 3.458 GHz (74.88%) + 357,168 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.18%) + 2,244,634,890 stalled-cycles-backend:u # 57.49% backend cycles idle (75.21%) + 6,704,968,345 instructions:u # 1.72 insn per cycle + # 0.33 stalled cycles per insn (75.21%) + 1.133564716 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735727803539E-004 +Relative difference = 6.446385744398604e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.725737e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.733579e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.733579e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.955483 sec -INFO: No Floating Point Exceptions have been reported - 2,630,257,808 cycles # 2.742 GHz - 5,970,030,267 instructions # 2.27 insn per cycle - 0.959792623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42602) (512y: 11) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.398705e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.403700e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.403700e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.177833 sec -INFO: No Floating Point Exceptions have been reported - 2,074,489,030 cycles # 1.756 GHz - 3,495,482,745 instructions # 1.68 insn per cycle - 1.182176144 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5208) (512y: 3) (512z:44858) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750237027223E-004 -Relative difference = 3.5765412974815996e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 307c9cbde7..c0a0efd470 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_18:00:14 -DATE: 2024-09-18_12:48:29 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.556326e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.594247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.598112e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.495550 sec -INFO: No Floating Point Exceptions have been reported - 2,046,506,588 cycles # 2.866 GHz - 3,036,453,126 instructions # 1.48 insn per cycle - 0.773976715 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.889528e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.021901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.022220e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.487499 sec +INFO: No Floating Point Exceptions have been reported + 1,399,710,931 cycles:u # 2.802 GHz (74.84%) + 2,534,919 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.85%) + 5,613,558 stalled-cycles-backend:u # 0.40% backend cycles idle (75.39%) + 1,783,832,893 instructions:u # 1.27 insn per cycle + # 0.00 stalled cycles per insn (76.12%) + 0.541982713 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.676205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.747820e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.750770e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.731569 sec -INFO: No Floating Point Exceptions have been reported - 5,750,101,661 cycles # 2.911 GHz - 12,015,194,090 instructions # 2.09 insn per cycle - 2.032327922 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.122721e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.137313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.137491e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.896901 sec +INFO: No Floating Point Exceptions have been reported + 13,168,180,477 cycles:u # 3.362 GHz (74.89%) + 2,694,573 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) + 6,900,160 stalled-cycles-backend:u # 0.05% backend cycles idle (75.20%) + 11,988,237,942 instructions:u # 0.91 insn per cycle + # 0.00 stalled cycles per insn (75.28%) + 3.955917360 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025600481842E-004 +Relative difference = 4.022433151864302e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.582687e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.583472e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.583472e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.381578 sec -INFO: No Floating Point Exceptions have been reported - 86,090,574,106 cycles # 2.930 GHz - 135,364,281,032 instructions # 1.57 insn per cycle - 29.385785407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.923477e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.924063e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.924063e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 27.692633 sec +INFO: No Floating Point Exceptions have been reported + 96,250,613,811 cycles:u # 3.476 GHz (74.99%) + 107,567,410 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.99%) + 6,145,471,263 stalled-cycles-backend:u # 6.38% backend cycles idle (74.99%) + 131,678,393,429 instructions:u # 1.37 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 27.700069041 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:16664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349662128086E-004 -Relative difference = 5.098002770919431e-09 +Avg ME (F77/C++) = 6.6275348053303901E-004 +Relative difference = 2.9372852846917734e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.781191e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.793019e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.793019e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.423420 sec -INFO: No Floating Point Exceptions have been reported - 6,852,713,563 cycles # 2.824 GHz - 19,471,819,479 instructions # 2.84 insn per cycle - 2.427762808 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.780732e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.791659e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.791659e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 2.114411 sec +INFO: No Floating Point Exceptions have been reported + 7,188,013,987 cycles:u # 3.395 GHz (74.96%) + 2,072,727 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.06%) + 3,171,264,483 stalled-cycles-backend:u # 44.12% backend cycles idle (75.06%) + 19,159,161,719 instructions:u # 2.67 insn per cycle + # 0.17 stalled cycles per insn (75.06%) + 2.121741568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68769) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862799683282E-004 -Relative difference = 4.2243518621014775e-08 +Avg ME (F77/C++) = 6.6274857155746575E-004 +Relative difference = 4.291602312495571e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.462291e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.467817e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.467817e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.126460 sec -INFO: No Floating Point Exceptions have been reported - 3,104,466,483 cycles # 2.747 GHz - 6,715,454,919 instructions # 2.16 insn per cycle - 1.130606631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47692) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.472732e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.476503e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.476503e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.120177 sec +INFO: No Floating Point Exceptions have been reported + 3,830,725,464 cycles:u # 3.412 GHz (74.87%) + 182,458 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.07%) + 2,163,439,633 stalled-cycles-backend:u # 56.48% backend cycles idle (75.07%) + 6,640,488,588 instructions:u # 1.73 insn per cycle + # 0.33 stalled cycles per insn (75.07%) + 1.127441049 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47334) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735712090414E-004 +Relative difference = 6.470095531024898e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.731919e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.740037e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.740037e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.951895 sec -INFO: No Floating Point Exceptions have been reported - 2,625,337,295 cycles # 2.748 GHz - 5,966,178,470 instructions # 2.27 insn per cycle - 0.956115789 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41858) (512y: 13) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.400560e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.405624e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.405624e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.176100 sec -INFO: No Floating Point Exceptions have been reported - 2,074,048,907 cycles # 1.758 GHz - 3,487,720,369 instructions # 1.68 insn per cycle - 1.180409639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44494) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750247886592E-004 -Relative difference = 3.740400032174438e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 9378c125b2..bb3fc679f3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:17:21 -DATE: 2024-09-18_12:18:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.318122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344688e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346795e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.537854 sec -INFO: No Floating Point Exceptions have been reported - 2,221,101,860 cycles # 2.870 GHz - 3,456,789,338 instructions # 1.56 insn per cycle - 0.830636964 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.203568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256693e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.256851e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.559094 sec +INFO: No Floating Point Exceptions have been reported + 1,652,028,100 cycles:u # 2.893 GHz (74.78%) + 2,492,328 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.84%) + 5,430,257 stalled-cycles-backend:u # 0.33% backend cycles idle (75.24%) + 2,047,800,783 instructions:u # 1.24 insn per cycle + # 0.00 stalled cycles per insn (74.71%) + 0.612737111 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.135476e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.165199e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.166400e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.042010 sec -INFO: No Floating Point Exceptions have been reported - 9,635,962,932 cycles # 2.918 GHz - 21,731,646,939 instructions # 2.26 insn per cycle - 3.358385171 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.679174e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.684528e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.684629e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.702058 sec +INFO: No Floating Point Exceptions have been reported + 22,612,281,809 cycles:u # 3.371 GHz (74.82%) + 3,413,939 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.91%) + 7,403,546 stalled-cycles-backend:u # 0.03% backend cycles idle (75.07%) + 20,078,972,435 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.17%) + 6.769412191 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.865433e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.866327e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.866327e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.798260 sec -INFO: No Floating Point Exceptions have been reported - 25,923,427,719 cycles # 2.945 GHz - 79,426,669,152 instructions # 3.06 insn per cycle - 8.802604907 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.565308e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.566517e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.566517e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.401537 sec +INFO: No Floating Point Exceptions have been reported + 21,503,404,728 cycles:u # 3.358 GHz (75.02%) + 817,007 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) + 2,712,543,238 stalled-cycles-backend:u # 12.61% backend cycles idle (75.02%) + 78,876,861,074 instructions:u # 3.67 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 6.408587192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4817) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.509753e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.512944e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512944e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.679543 sec -INFO: No Floating Point Exceptions have been reported - 12,835,987,651 cycles # 2.741 GHz - 38,823,362,502 instructions # 3.02 insn per cycle - 4.683930656 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.463692e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.468812e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.468812e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.010165 sec +INFO: No Floating Point Exceptions have been reported + 10,388,779,314 cycles:u # 3.448 GHz (74.90%) + 3,190,734 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.04%) + 1,318,490,699 stalled-cycles-backend:u # 12.69% backend cycles idle (75.04%) + 38,688,631,929 instructions:u # 3.72 insn per cycle + # 0.03 stalled cycles per insn (75.04%) + 3.017230551 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12020) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.042437e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.059866e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.059866e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.045674 sec -INFO: No Floating Point Exceptions have been reported - 5,599,505,022 cycles # 2.733 GHz - 13,616,194,882 instructions # 2.43 insn per cycle - 2.050016410 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.216759e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.219259e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.219259e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.356179 sec +INFO: No Floating Point Exceptions have been reported + 4,702,260,606 cycles:u # 3.460 GHz (74.87%) + 623,171 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.70%) + 440,406,312 stalled-cycles-backend:u # 9.37% backend cycles idle (74.59%) + 13,620,136,115 instructions:u # 2.90 insn per cycle + # 0.03 stalled cycles per insn (74.88%) + 1.363616004 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.300992e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.323362e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.323362e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.769579 sec -INFO: No Floating Point Exceptions have been reported - 4,864,538,423 cycles # 2.743 GHz - 12,294,521,282 instructions # 2.53 insn per cycle - 1.774039102 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 80) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.972443e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.984642e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.984642e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.358911 sec -INFO: No Floating Point Exceptions have been reported - 4,168,866,472 cycles # 1.765 GHz - 6,393,098,618 instructions # 1.53 insn per cycle - 2.363390601 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1983) (512y: 92) (512z: 9360) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 032ee51884..56d4d37ac1 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-09-18_17:17:44 -DATE: 2024-09-18_12:18:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.323949e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.349755e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352036e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.534628 sec -INFO: No Floating Point Exceptions have been reported - 2,204,767,059 cycles # 2.871 GHz - 3,455,052,131 instructions # 1.57 insn per cycle - 0.826431777 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.218240e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.272928e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273080e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.542918 sec +INFO: No Floating Point Exceptions have been reported + 1,567,973,610 cycles:u # 2.821 GHz (74.81%) + 2,593,051 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.60%) + 6,325,504 stalled-cycles-backend:u # 0.40% backend cycles idle (75.57%) + 2,043,128,897 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 0.593353614 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.145238e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.175049e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176235e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.038517 sec -INFO: No Floating Point Exceptions have been reported - 9,654,182,964 cycles # 2.928 GHz - 20,172,707,879 instructions # 2.09 insn per cycle - 3.353606693 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.691717e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.697156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.697257e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.465394 sec +INFO: No Floating Point Exceptions have been reported + 21,957,831,405 cycles:u # 3.378 GHz (75.02%) + 3,245,067 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.09%) + 7,050,657 stalled-cycles-backend:u # 0.03% backend cycles idle (75.16%) + 19,596,656,251 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.14%) + 6.546179574 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.861444e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.862342e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.862342e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.816790 sec -INFO: No Floating Point Exceptions have been reported - 25,987,801,849 cycles # 2.947 GHz - 79,452,087,213 instructions # 3.06 insn per cycle - 8.821027518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.642658e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.643866e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.643866e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.214109 sec +INFO: No Floating Point Exceptions have been reported + 21,594,617,303 cycles:u # 3.474 GHz (74.92%) + 773,227 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 2,926,370,471 stalled-cycles-backend:u # 13.55% backend cycles idle (75.04%) + 78,771,997,564 instructions:u # 3.65 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 6.221411052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.513306e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.516455e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.516455e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.674356 sec -INFO: No Floating Point Exceptions have been reported - 12,813,296,665 cycles # 2.739 GHz - 38,778,823,155 instructions # 3.03 insn per cycle - 4.678665662 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.498994e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.504608e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.504608e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.990999 sec +INFO: No Floating Point Exceptions have been reported + 10,340,645,927 cycles:u # 3.454 GHz (74.89%) + 3,779,975 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.88%) + 1,345,814,664 stalled-cycles-backend:u # 13.01% backend cycles idle (74.90%) + 38,784,635,393 instructions:u # 3.75 insn per cycle + # 0.03 stalled cycles per insn (75.01%) + 2.997927612 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11990) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.042911e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.058963e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.058963e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.045213 sec -INFO: No Floating Point Exceptions have been reported - 5,589,546,199 cycles # 2.728 GHz - 13,732,854,665 instructions # 2.46 insn per cycle - 2.049788655 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.208487e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.210948e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.210948e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.365089 sec +INFO: No Floating Point Exceptions have been reported + 4,698,068,104 cycles:u # 3.435 GHz (74.90%) + 970,217 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.86%) + 438,810,652 stalled-cycles-backend:u # 9.34% backend cycles idle (74.85%) + 13,603,628,607 instructions:u # 2.90 insn per cycle + # 0.03 stalled cycles per insn (74.85%) + 1.371980486 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10235) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.106583e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.127720e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.127720e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.807350 sec -INFO: No Floating Point Exceptions have been reported - 4,955,573,408 cycles # 2.736 GHz - 12,423,027,135 instructions # 2.51 insn per cycle - 1.811880023 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.875797e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.888202e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.888202e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.391557 sec -INFO: No Floating Point Exceptions have been reported - 4,183,217,410 cycles # 1.747 GHz - 6,495,987,121 instructions # 1.55 insn per cycle - 2.396041838 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1806) (512y: 190) (512z: 9358) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 7ab313debd..b360bc4479 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:20:52 +DATE: 2024-09-18_17:18:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.053996e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.054389e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.054544e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.439529 sec -INFO: No Floating Point Exceptions have been reported - 8,096,284,346 cycles # 2.927 GHz - 17,063,420,790 instructions # 2.11 insn per cycle - 2.826206150 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.238045e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.240055e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.240313e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.011196 sec -INFO: No Floating Point Exceptions have been reported - 12,704,613,289 cycles # 2.925 GHz - 30,115,204,727 instructions # 2.37 insn per cycle - 4.397191434 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.572616e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.572823e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.572823e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.975845 sec -INFO: No Floating Point Exceptions have been reported - 19,035,417,803 cycles # 2.728 GHz - 53,904,235,908 instructions # 2.83 insn per cycle - 6.980238056 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.186105e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186142e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186142e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.469648 sec +INFO: No Floating Point Exceptions have been reported + 15,438,712,619 cycles:u # 3.463 GHz (74.97%) + 7,657,888 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) + 1,562,103,760 stalled-cycles-backend:u # 10.12% backend cycles idle (75.06%) + 53,525,096,792 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 4.476405884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.590030e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.590126e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.590126e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.323749 sec -INFO: No Floating Point Exceptions have been reported - 9,780,563,101 cycles # 2.940 GHz - 27,151,089,688 instructions # 2.78 insn per cycle - 3.328023666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.331650e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.331789e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331789e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.268575 sec +INFO: No Floating Point Exceptions have been reported + 7,859,912,105 cycles:u # 3.461 GHz (74.99%) + 930,734 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) + 761,875,838 stalled-cycles-backend:u # 9.69% backend cycles idle (74.99%) + 27,073,752,041 instructions:u # 3.44 insn per cycle + # 0.03 stalled cycles per insn (74.99%) + 2.275150161 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.385331e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.385742e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.385742e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.561733 sec -INFO: No Floating Point Exceptions have been reported - 4,266,182,969 cycles # 2.725 GHz - 9,590,975,871 instructions # 2.25 insn per cycle - 1.566018474 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.142642e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.143137e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.143137e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.028794 sec +INFO: No Floating Point Exceptions have been reported + 3,559,164,828 cycles:u # 3.451 GHz (74.59%) + 695,562 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) + 279,310,261 stalled-cycles-backend:u # 7.85% backend cycles idle (75.19%) + 9,564,381,285 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (75.19%) + 1.035696149 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.892057e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892635e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892635e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.358913 sec -INFO: No Floating Point Exceptions have been reported - 3,729,263,843 cycles # 2.737 GHz - 8,515,569,817 instructions # 2.28 insn per cycle - 1.363199183 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.395803e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.396338e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.396338e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.556955 sec -INFO: No Floating Point Exceptions have been reported - 2,698,860,839 cycles # 1.729 GHz - 4,282,343,065 instructions # 1.59 insn per cycle - 1.561500058 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 5983376983..5ed64682b9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:56:01 +DATE: 2024-09-18_18:08:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.052616e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.054381e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.054381e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.389054 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,904,157,038 cycles # 2.912 GHz - 16,771,352,323 instructions # 2.12 insn per cycle - 2.770325050 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.237194e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.272545e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.272545e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.987988 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 12,604,681,487 cycles # 2.919 GHz - 28,965,849,382 instructions # 2.30 insn per cycle - 4.373962640 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.613542e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.613748e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.613748e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.936083 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,900,515,421 cycles # 2.724 GHz - 53,905,451,035 instructions # 2.85 insn per cycle - 6.940621858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.177999e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178036e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.178036e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.485152 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,438,451,647 cycles:u # 3.440 GHz (74.90%) + 8,146,011 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.98%) + 1,535,013,765 stalled-cycles-backend:u # 9.94% backend cycles idle (75.05%) + 53,481,620,838 instructions:u # 3.46 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 4.492997642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.538785e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.538876e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.538876e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.433615 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,052,781,401 cycles # 2.925 GHz - 27,153,872,228 instructions # 2.70 insn per cycle - 3.438126502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.268926e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.269053e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.269053e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.327192 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,142,656,834 cycles:u # 3.495 GHz (74.89%) + 1,696,473 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) + 871,992,317 stalled-cycles-backend:u # 10.71% backend cycles idle (74.94%) + 27,012,965,155 instructions:u # 3.32 insn per cycle + # 0.03 stalled cycles per insn (74.94%) + 2.334766690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.384986e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.385397e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.385397e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.561732 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,257,385,748 cycles # 2.719 GHz - 9,593,157,745 instructions # 2.25 insn per cycle - 1.566325188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.154554e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.155050e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.155050e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.027621 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,597,273,520 cycles:u # 3.491 GHz (74.96%) + 2,850,406 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.16%) + 326,699,824 stalled-cycles-backend:u # 9.08% backend cycles idle (75.16%) + 9,558,618,447 instructions:u # 2.66 insn per cycle + # 0.03 stalled cycles per insn (75.16%) + 1.035578637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.887075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.887680e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.887680e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.360664 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,718,394,007 cycles # 2.725 GHz - 8,517,746,108 instructions # 2.29 insn per cycle - 1.365273931 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.422958e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.423581e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423581e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.545411 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,703,115,511 cycles # 1.745 GHz - 4,284,711,505 instructions # 1.59 insn per cycle - 1.550234745 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 6972883511..1fdd2acac3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:22:20 +DATE: 2024-09-18_17:19:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.054893e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055305e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055482e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.442249 sec -INFO: No Floating Point Exceptions have been reported - 8,106,561,725 cycles # 2.931 GHz - 17,204,264,784 instructions # 2.12 insn per cycle - 2.825101828 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.195814e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.197984e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.198227e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.015071 sec -INFO: No Floating Point Exceptions have been reported - 12,724,131,626 cycles # 2.928 GHz - 29,969,146,046 instructions # 2.36 insn per cycle - 4.400441096 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.111535e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.111769e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.111769e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.512090 sec -INFO: No Floating Point Exceptions have been reported - 18,865,192,455 cycles # 2.896 GHz - 53,932,477,912 instructions # 2.86 insn per cycle - 6.516216407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.179822e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.179860e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.179860e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.476637 sec +INFO: No Floating Point Exceptions have been reported + 15,420,433,610 cycles:u # 3.443 GHz (75.00%) + 3,322,675 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) + 1,629,605,946 stalled-cycles-backend:u # 10.57% backend cycles idle (75.00%) + 53,478,329,026 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.00%) + 4.483381301 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.566187e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.566277e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.566277e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.374379 sec -INFO: No Floating Point Exceptions have been reported - 9,914,343,626 cycles # 2.935 GHz - 27,131,823,716 instructions # 2.74 insn per cycle - 3.378885579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.327075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.327244e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.327244e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.315391 sec +INFO: No Floating Point Exceptions have been reported + 8,010,165,479 cycles:u # 3.455 GHz (74.85%) + 2,043,919 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.82%) + 796,468,646 stalled-cycles-backend:u # 9.94% backend cycles idle (74.89%) + 27,088,519,124 instructions:u # 3.38 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 2.323093595 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.354421e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.354826e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.354826e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.575572 sec -INFO: No Floating Point Exceptions have been reported - 4,301,534,798 cycles # 2.724 GHz - 9,586,207,937 instructions # 2.23 insn per cycle - 1.579825913 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.121290e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.121789e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.121789e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.032658 sec +INFO: No Floating Point Exceptions have been reported + 3,553,389,334 cycles:u # 3.432 GHz (74.53%) + 1,260,522 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.80%) + 317,263,372 stalled-cycles-backend:u # 8.93% backend cycles idle (75.17%) + 9,558,578,083 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (75.28%) + 1.039354410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.882229e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.882764e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.882764e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.361976 sec -INFO: No Floating Point Exceptions have been reported - 3,732,974,645 cycles # 2.734 GHz - 8,507,919,232 instructions # 2.28 insn per cycle - 1.366219448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.421560e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.422069e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.422069e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.546646 sec -INFO: No Floating Point Exceptions have been reported - 2,700,867,753 cycles # 1.742 GHz - 4,281,876,861 instructions # 1.59 insn per cycle - 1.551074701 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2693) (512y: 184) (512z:79098) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 41f4336bf3..076451c385 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:23:48 +DATE: 2024-09-18_17:19:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.204897e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.205686e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.206019e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.744512 sec -INFO: No Floating Point Exceptions have been reported - 5,890,882,031 cycles # 2.919 GHz - 11,806,932,962 instructions # 2.00 insn per cycle - 2.074529782 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.136881e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.137530e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.137618e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.049597 sec -INFO: No Floating Point Exceptions have been reported - 6,759,095,385 cycles # 2.923 GHz - 14,845,205,038 instructions # 2.20 insn per cycle - 2.369358973 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.543544e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.543805e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.543805e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.183077 sec -INFO: No Floating Point Exceptions have been reported - 18,161,151,116 cycles # 2.936 GHz - 53,910,939,698 instructions # 2.97 insn per cycle - 6.187519652 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.080859e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.080881e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080881e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.889914 sec +INFO: No Floating Point Exceptions have been reported + 16,954,036,007 cycles:u # 3.465 GHz (75.02%) + 102,686,449 stalled-cycles-frontend:u # 0.61% frontend cycles idle (74.99%) + 1,739,144,010 stalled-cycles-backend:u # 10.26% backend cycles idle (74.98%) + 54,211,433,935 instructions:u # 3.20 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 4.897013982 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.361492e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.361888e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.361888e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.573076 sec -INFO: No Floating Point Exceptions have been reported - 4,616,676,545 cycles # 2.928 GHz - 13,807,548,367 instructions # 2.99 insn per cycle - 1.577368513 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.827034e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.827460e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.827460e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.096823 sec +INFO: No Floating Point Exceptions have been reported + 3,748,016,979 cycles:u # 3.409 GHz (74.60%) + 686,539 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) + 358,581,520 stalled-cycles-backend:u # 9.57% backend cycles idle (75.25%) + 13,752,008,336 instructions:u # 3.67 insn per cycle + # 0.03 stalled cycles per insn (75.27%) + 1.126413502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.784398e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.786227e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.786227e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.780447 sec -INFO: No Floating Point Exceptions have been reported - 2,130,555,516 cycles # 2.717 GHz - 4,837,275,089 instructions # 2.27 insn per cycle - 0.784743576 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.978506e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.980069e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.980069e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.531646 sec +INFO: No Floating Point Exceptions have been reported + 1,793,798,155 cycles:u # 3.357 GHz (74.56%) + 990,764 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.56%) + 176,945,664 stalled-cycles-backend:u # 9.86% backend cycles idle (74.33%) + 4,825,441,710 instructions:u # 2.69 insn per cycle + # 0.04 stalled cycles per insn (75.08%) + 0.538766048 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.634242e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.636553e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.636553e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.693797 sec -INFO: No Floating Point Exceptions have been reported - 1,903,490,036 cycles # 2.729 GHz - 4,291,225,209 instructions # 2.25 insn per cycle - 0.698112096 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.885404e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.887629e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.887629e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.769903 sec -INFO: No Floating Point Exceptions have been reported - 1,354,371,935 cycles # 1.750 GHz - 2,162,822,545 instructions # 1.60 insn per cycle - 0.774469858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 8d8b09449b..306fb8fe41 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:57:30 +DATE: 2024-09-18_18:08:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.296128e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.300632e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.300632e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 -TOTAL : 1.691925 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,650,162,983 cycles # 2.892 GHz - 11,596,549,862 instructions # 2.05 insn per cycle - 2.010258263 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.106225e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.117844e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.117844e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 -TOTAL : 2.039868 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,704,150,880 cycles # 2.913 GHz - 14,933,981,007 instructions # 2.23 insn per cycle - 2.357689511 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.476123e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.476381e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.476381e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.231080 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,168,605,946 cycles # 2.914 GHz - 53,913,151,543 instructions # 2.97 insn per cycle - 6.235604617 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.089078e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089099e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089099e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.849175 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 16,844,001,251 cycles:u # 3.472 GHz (75.00%) + 101,836,746 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.95%) + 1,763,250,726 stalled-cycles-backend:u # 10.47% backend cycles idle (74.94%) + 54,132,335,107 instructions:u # 3.21 insn per cycle + # 0.03 stalled cycles per insn (74.94%) + 4.973728962 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.367327e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.367745e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.367745e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.570359 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,609,204,013 cycles # 2.928 GHz - 13,810,618,137 instructions # 3.00 insn per cycle - 1.574904752 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.855881e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.856328e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.856328e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.089527 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,771,188,595 cycles:u # 3.452 GHz (75.05%) + 597,687 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) + 332,206,250 stalled-cycles-backend:u # 8.81% backend cycles idle (75.11%) + 13,755,411,669 instructions:u # 3.65 insn per cycle + # 0.02 stalled cycles per insn (75.11%) + 1.097297671 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.813057e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.814753e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.814753e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.777334 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,130,492,369 cycles # 2.727 GHz - 4,838,939,909 instructions # 2.27 insn per cycle - 0.781848874 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.029580e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.029744e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.029744e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.515291 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,780,186,787 cycles:u # 3.435 GHz (75.01%) + 322,585 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.31%) + 157,439,754 stalled-cycles-backend:u # 8.84% backend cycles idle (75.31%) + 4,809,438,119 instructions:u # 2.70 insn per cycle + # 0.03 stalled cycles per insn (75.31%) + 0.523072659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.672152e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.674283e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.674283e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.690681 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,888,040,180 cycles # 2.718 GHz - 4,293,435,273 instructions # 2.27 insn per cycle - 0.695178892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.827455e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.829435e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.829435e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.776050 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,356,992,115 cycles # 1.740 GHz - 2,165,171,343 instructions # 1.60 insn per cycle - 0.780688696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 43e4fd4779..d55898c9ff 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:24:51 +DATE: 2024-09-18_17:20:11 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.195253e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.195989e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.196280e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.748628 sec -INFO: No Floating Point Exceptions have been reported - 5,866,556,695 cycles # 2.917 GHz - 12,565,857,650 instructions # 2.14 insn per cycle - 2.067294353 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.121566e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.122211e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.122327e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.053021 sec -INFO: No Floating Point Exceptions have been reported - 6,778,897,896 cycles # 2.924 GHz - 14,985,250,436 instructions # 2.21 insn per cycle - 2.374125707 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260107E-003 -Relative difference = 0.0021940095370041636 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.587070e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.587332e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.587332e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.151381 sec -INFO: No Floating Point Exceptions have been reported - 18,055,403,744 cycles # 2.934 GHz - 53,896,033,902 instructions # 2.99 insn per cycle - 6.155606485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.084126e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.084147e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084147e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.875160 sec +INFO: No Floating Point Exceptions have been reported + 16,871,163,360 cycles:u # 3.459 GHz (74.93%) + 104,611,881 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.91%) + 1,736,496,978 stalled-cycles-backend:u # 10.29% backend cycles idle (74.94%) + 54,185,128,945 instructions:u # 3.21 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 4.882221401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087572898E-003 -Relative difference = 2.1198021522715588e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614199186E-003 +Relative difference = 3.435558690007174e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.398632e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.399059e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.399059e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.555535 sec -INFO: No Floating Point Exceptions have been reported - 4,569,755,461 cycles # 2.931 GHz - 13,800,747,699 instructions # 3.02 insn per cycle - 1.559859354 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.877298e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.877727e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.877727e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.085679 sec +INFO: No Floating Point Exceptions have been reported + 3,755,580,707 cycles:u # 3.451 GHz (75.08%) + 515,828 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 341,735,253 stalled-cycles-backend:u # 9.10% backend cycles idle (75.01%) + 13,749,402,021 instructions:u # 3.66 insn per cycle + # 0.02 stalled cycles per insn (75.01%) + 1.092576997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896065809E-003 -Relative difference = 3.151856596628469e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.803652e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.805665e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.805665e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.778546 sec -INFO: No Floating Point Exceptions have been reported - 2,147,523,686 cycles # 2.745 GHz - 4,840,927,245 instructions # 2.25 insn per cycle - 0.782889882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.020454e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.020611e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.020611e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.519654 sec +INFO: No Floating Point Exceptions have been reported + 1,798,676,182 cycles:u # 3.444 GHz (74.57%) + 672,805 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.30%) + 155,964,300 stalled-cycles-backend:u # 8.67% backend cycles idle (75.50%) + 4,810,457,285 instructions:u # 2.67 insn per cycle + # 0.03 stalled cycles per insn (75.50%) + 0.526441967 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.693768e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.696106e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.696106e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.688038 sec -INFO: No Floating Point Exceptions have been reported - 1,894,736,849 cycles # 2.739 GHz - 4,295,025,191 instructions # 2.27 insn per cycle - 0.692237484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 25) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.859865e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.862153e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.862153e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.772052 sec -INFO: No Floating Point Exceptions have been reported - 1,359,092,301 cycles # 1.753 GHz - 2,169,957,409 instructions # 1.60 insn per cycle - 0.776490041 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982957326E-003 -Relative difference = 2.0044082998332894e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index e02407d644..cfcc794bdd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:25:53 +DATE: 2024-09-18_17:20:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.664550e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665186e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665405e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.193141 sec -INFO: No Floating Point Exceptions have been reported - 7,365,717,542 cycles # 2.923 GHz - 16,291,118,073 instructions # 2.21 insn per cycle - 2.576836591 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.102923e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.103231e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.103265e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.419785 sec -INFO: No Floating Point Exceptions have been reported - 10,963,927,138 cycles # 2.923 GHz - 24,861,261,596 instructions # 2.27 insn per cycle - 3.806537159 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.500673e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.500867e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.500867e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 7.041188 sec -INFO: No Floating Point Exceptions have been reported - 19,221,485,171 cycles # 2.729 GHz - 54,134,690,618 instructions # 2.82 insn per cycle - 7.045507456 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.186093e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186131e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186131e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.453659 sec +INFO: No Floating Point Exceptions have been reported + 15,438,755,787 cycles:u # 3.465 GHz (74.98%) + 2,696,401 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) + 1,605,984,207 stalled-cycles-backend:u # 10.40% backend cycles idle (75.05%) + 53,741,110,532 instructions:u # 3.48 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 4.464196839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.537074e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.537163e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.537163e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.437220 sec -INFO: No Floating Point Exceptions have been reported - 9,396,080,919 cycles # 2.731 GHz - 26,188,082,836 instructions # 2.79 insn per cycle - 3.441517756 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.456635e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.456794e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.456794e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.152502 sec +INFO: No Floating Point Exceptions have been reported + 7,422,082,381 cycles:u # 3.442 GHz (74.98%) + 1,988,064 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) + 816,635,761 stalled-cycles-backend:u # 11.00% backend cycles idle (74.81%) + 25,866,311,024 instructions:u # 3.49 insn per cycle + # 0.03 stalled cycles per insn (74.88%) + 2.207965851 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.541134e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.541635e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.541635e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.493238 sec -INFO: No Floating Point Exceptions have been reported - 4,077,957,635 cycles # 2.724 GHz - 9,249,641,886 instructions # 2.27 insn per cycle - 1.497708781 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.450735e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.451251e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.451251e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.970672 sec +INFO: No Floating Point Exceptions have been reported + 3,362,824,915 cycles:u # 3.455 GHz (74.57%) + 927,977 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.48%) + 306,236,549 stalled-cycles-backend:u # 9.11% backend cycles idle (74.89%) + 9,089,995,529 instructions:u # 2.70 insn per cycle + # 0.03 stalled cycles per insn (75.35%) + 0.977500128 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.136665e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.137271e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.137271e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.279311 sec -INFO: No Floating Point Exceptions have been reported - 3,517,339,720 cycles # 2.742 GHz - 8,183,228,052 instructions # 2.33 insn per cycle - 1.283633317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 80) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.501058e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.501647e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.501647e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.511075 sec -INFO: No Floating Point Exceptions have been reported - 2,666,286,599 cycles # 1.760 GHz - 4,173,044,119 instructions # 1.57 insn per cycle - 1.515586960 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 92) (512z:78910) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 59afbf5683..73a4cbfb60 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-09-18_12:27:20 +DATE: 2024-09-18_17:21:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.673618e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.674137e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.674360e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.187725 sec -INFO: No Floating Point Exceptions have been reported - 7,320,649,548 cycles # 2.912 GHz - 16,262,382,237 instructions # 2.22 insn per cycle - 2.571114049 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.105826e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.106139e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.106173e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.426824 sec -INFO: No Floating Point Exceptions have been reported - 11,001,686,020 cycles # 2.930 GHz - 25,147,468,300 instructions # 2.29 insn per cycle - 3.812692076 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.043178e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.043403e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.043403e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.566514 sec -INFO: No Floating Point Exceptions have been reported - 19,176,347,779 cycles # 2.919 GHz - 54,156,968,111 instructions # 2.82 insn per cycle - 6.570813145 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.175682e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175719e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175719e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.492373 sec +INFO: No Floating Point Exceptions have been reported + 15,577,763,550 cycles:u # 3.466 GHz (74.92%) + 4,687,227 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.91%) + 1,702,139,874 stalled-cycles-backend:u # 10.93% backend cycles idle (74.93%) + 53,780,833,557 instructions:u # 3.45 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 4.498883454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.555217e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.555303e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.555303e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.398105 sec -INFO: No Floating Point Exceptions have been reported - 9,273,027,189 cycles # 2.726 GHz - 26,087,136,722 instructions # 2.81 insn per cycle - 3.402445291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.466705e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466857e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.466857e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.141419 sec +INFO: No Floating Point Exceptions have been reported + 7,415,111,989 cycles:u # 3.459 GHz (75.01%) + 14,491,429 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.00%) + 753,324,685 stalled-cycles-backend:u # 10.16% backend cycles idle (75.00%) + 25,742,576,081 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.00%) + 2.147961105 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.537227e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.537679e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.537679e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.494127 sec -INFO: No Floating Point Exceptions have been reported - 4,071,118,335 cycles # 2.719 GHz - 9,214,803,224 instructions # 2.26 insn per cycle - 1.498443184 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.461169e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.461707e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.461707e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.968763 sec +INFO: No Floating Point Exceptions have been reported + 3,353,428,196 cycles:u # 3.452 GHz (74.49%) + 1,338,534 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.72%) + 289,176,814 stalled-cycles-backend:u # 8.62% backend cycles idle (75.13%) + 9,029,253,470 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (75.30%) + 0.975515131 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.138433e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.139090e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.139090e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.277732 sec -INFO: No Floating Point Exceptions have been reported - 3,507,535,748 cycles # 2.738 GHz - 8,168,319,774 instructions # 2.33 insn per cycle - 1.282049677 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 230) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.543576e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.544114e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.544114e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.493048 sec -INFO: No Floating Point Exceptions have been reported - 2,621,670,941 cycles # 1.752 GHz - 4,167,760,475 instructions # 1.59 insn per cycle - 1.497511330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1879) (512y: 174) (512z:78884) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index b0413f07b6..56928fe017 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:08 -DATE: 2024-09-18_12:19:33 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.879954e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.891707e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.001488e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459044 sec -INFO: No Floating Point Exceptions have been reported - 1,939,663,698 cycles # 2.864 GHz - 2,747,739,655 instructions # 1.42 insn per cycle - 0.734387710 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 750,810,383 cycles:u # 2.201 GHz (73.28%) + 2,685,589 stalled-cycles-frontend:u # 0.36% frontend cycles idle (68.25%) + 6,438,177 stalled-cycles-backend:u # 0.86% backend cycles idle (72.32%) + 1,309,783,720 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (78.01%) + 0.392567056 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.061438e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.512391e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.741979e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.541789 sec -INFO: No Floating Point Exceptions have been reported - 2,258,330,350 cycles # 2.885 GHz - 3,233,524,764 instructions # 1.43 insn per cycle - 0.842114758 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.056900e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079897e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079897e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.570278 sec -INFO: No Floating Point Exceptions have been reported - 4,626,289,546 cycles # 2.939 GHz - 13,191,201,959 instructions # 2.85 insn per cycle - 1.574568894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 945,127,956 cycles:u # 2.104 GHz (74.53%) + 2,369,108 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.38%) + 6,385,127 stalled-cycles-backend:u # 0.68% backend cycles idle (74.64%) + 1,559,615,497 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (73.60%) + 0.487906308 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499481 -Relative difference = 5.286896511435107e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.877819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.949205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.949205e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.891875 sec -INFO: No Floating Point Exceptions have been reported - 2,638,327,743 cycles # 2.947 GHz - 7,555,209,951 instructions # 2.86 insn per cycle - 0.896114078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.170773e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.377039e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.377039e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.536262 sec -INFO: No Floating Point Exceptions have been reported - 1,489,383,659 cycles # 2.759 GHz - 3,159,296,473 instructions # 2.12 insn per cycle - 0.540558254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.529419e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.784986e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784986e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.483576 sec -INFO: No Floating Point Exceptions have been reported - 1,345,705,641 cycles # 2.762 GHz - 3,013,816,668 instructions # 2.24 insn per cycle - 0.487835073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.357874e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.470306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.470306e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.716743 sec -INFO: No Floating Point Exceptions have been reported - 1,329,087,485 cycles # 1.845 GHz - 1,962,911,490 instructions # 1.48 insn per cycle - 0.721045759 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index e338aa0c83..3045dd42da 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,282 +1,61 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_18:07:10 -DATE: 2024-09-18_12:54:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.300988e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.591479e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.591479e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.483517 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,018,637,546 cycles # 2.889 GHz - 3,002,221,313 instructions # 1.49 insn per cycle - 0.755433693 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 788,352,573 cycles:u # 2.294 GHz (73.24%) + 3,031,600 stalled-cycles-frontend:u # 0.38% frontend cycles idle (69.20%) + 34,280,145 stalled-cycles-backend:u # 4.35% backend cycles idle (72.41%) + 1,272,941,365 instructions:u # 1.61 insn per cycle + # 0.03 stalled cycles per insn (77.37%) + 0.376352362 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.209513e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.250583e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.250583e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.757854 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,924,491,267 cycles # 2.893 GHz - 4,472,331,439 instructions # 1.53 insn per cycle - 1.067931667 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.054217e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.077389e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.077389e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.581825 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,664,515,506 cycles # 2.942 GHz - 13,198,020,525 instructions # 2.83 insn per cycle - 1.586342613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499481 -Relative difference = 5.286896511435107e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.861182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934526e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934526e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.908066 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,683,422,373 cycles # 2.942 GHz - 7,604,693,273 instructions # 2.83 insn per cycle - 0.912668086 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.136463e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344918e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.344918e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.550693 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,532,887,808 cycles # 2.763 GHz - 3,210,306,872 instructions # 2.09 insn per cycle - 0.555384102 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.483226e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.741231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.741231e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.498787 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,390,412,454 cycles # 2.766 GHz - 3,064,189,434 instructions # 2.20 insn per cycle - 0.503409402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.324425e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.438930e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.438930e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.734309 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,369,927,300 cycles # 1.856 GHz - 2,000,629,444 instructions # 1.46 insn per cycle - 0.738870915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault + 2,987,537,745 cycles:u # 2.763 GHz (75.82%) + 16,562,938 stalled-cycles-frontend:u # 0.55% frontend cycles idle (75.58%) + 817,992,169 stalled-cycles-backend:u # 27.38% backend cycles idle (75.59%) + 3,167,893,164 instructions:u # 1.06 insn per cycle + # 0.26 stalled cycles per insn (75.61%) + 1.119291263 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 698af75849..0a3aafbd7f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:10 -DATE: 2024-09-18_12:19:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.870113e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.853917e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.966787e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459366 sec -INFO: No Floating Point Exceptions have been reported - 1,939,416,729 cycles # 2.875 GHz - 2,719,660,225 instructions # 1.40 insn per cycle - 0.733418344 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 811,304,021 cycles:u # 2.416 GHz (70.69%) + 2,521,242 stalled-cycles-frontend:u # 0.31% frontend cycles idle (72.81%) + 6,643,430 stalled-cycles-backend:u # 0.82% backend cycles idle (76.33%) + 1,326,175,431 instructions:u # 1.63 insn per cycle + # 0.01 stalled cycles per insn (76.30%) + 0.374116903 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.000378e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.373625e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.579737e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.544100 sec -INFO: No Floating Point Exceptions have been reported - 2,268,193,870 cycles # 2.866 GHz - 3,228,698,159 instructions # 1.42 insn per cycle - 0.849845463 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.061980e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085030e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085030e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.562513 sec -INFO: No Floating Point Exceptions have been reported - 4,622,072,256 cycles # 2.951 GHz - 13,179,636,938 instructions # 2.85 insn per cycle - 1.566824554 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault + 951,602,277 cycles:u # 2.125 GHz (75.16%) + 2,406,140 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.11%) + 6,352,933 stalled-cycles-backend:u # 0.67% backend cycles idle (75.69%) + 1,523,514,711 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (74.26%) + 0.488589383 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499481 -Relative difference = 5.286896511435107e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.876350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.948368e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.948368e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.892202 sec -INFO: No Floating Point Exceptions have been reported - 2,639,628,239 cycles # 2.947 GHz - 7,552,826,806 instructions # 2.86 insn per cycle - 0.896585147 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467499475 -Relative difference = 5.286896515331313e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.183448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.393646e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.393646e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.534141 sec -INFO: No Floating Point Exceptions have been reported - 1,491,163,611 cycles # 2.773 GHz - 3,158,625,928 instructions # 2.12 insn per cycle - 0.538404277 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.492000e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.744364e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.744364e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.489086 sec -INFO: No Floating Point Exceptions have been reported - 1,346,762,343 cycles # 2.733 GHz - 3,011,186,186 instructions # 2.24 insn per cycle - 0.493386881 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.331076e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.442354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.442354e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.724419 sec -INFO: No Floating Point Exceptions have been reported - 1,327,007,586 cycles # 1.823 GHz - 1,960,723,409 instructions # 1.48 insn per cycle - 0.728619129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8a6bb74f5e..e1363c40e9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:13 -DATE: 2024-09-18_12:20:00 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.830452e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.999598e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147092e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.454704 sec -INFO: No Floating Point Exceptions have been reported - 1,916,653,758 cycles # 2.859 GHz - 2,706,744,679 instructions # 1.41 insn per cycle - 0.728210028 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 764,555,819 cycles:u # 2.292 GHz (74.60%) + 2,714,289 stalled-cycles-frontend:u # 0.36% frontend cycles idle (71.36%) + 6,306,367 stalled-cycles-backend:u # 0.82% backend cycles idle (72.97%) + 1,326,786,226 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (75.59%) + 0.376846597 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.474236e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.587297e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.949656e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.490292 sec -INFO: No Floating Point Exceptions have been reported - 2,078,449,093 cycles # 2.886 GHz - 2,974,210,572 instructions # 1.43 insn per cycle - 0.777065481 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.100218e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.125308e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.125308e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.507385 sec -INFO: No Floating Point Exceptions have been reported - 4,410,101,975 cycles # 2.919 GHz - 12,953,085,822 instructions # 2.94 insn per cycle - 1.511568329 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 904,818,769 cycles:u # 2.159 GHz (75.09%) + 2,536,848 stalled-cycles-frontend:u # 0.28% frontend cycles idle (70.53%) + 6,504,319 stalled-cycles-backend:u # 0.72% backend cycles idle (71.17%) + 1,450,130,950 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (75.89%) + 0.458219922 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.885848e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.067058e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.067058e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.584913 sec -INFO: No Floating Point Exceptions have been reported - 1,727,797,245 cycles # 2.936 GHz - 4,541,987,860 instructions # 2.63 insn per cycle - 0.589023498 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.703055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.396540e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.396540e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.305122 sec -INFO: No Floating Point Exceptions have been reported - 856,571,449 cycles # 2.776 GHz - 1,917,826,981 instructions # 2.24 insn per cycle - 0.309207440 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.972249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.763699e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.763699e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.292353 sec -INFO: No Floating Point Exceptions have been reported - 806,013,891 cycles # 2.724 GHz - 1,834,284,908 instructions # 2.28 insn per cycle - 0.296525539 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.507099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.952644e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.952644e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.383525 sec -INFO: No Floating Point Exceptions have been reported - 728,616,899 cycles # 1.883 GHz - 1,308,760,783 instructions # 1.80 insn per cycle - 0.387733440 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index a6b985fae9..c373b3f7b1 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,282 +1,61 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_18:07:13 -DATE: 2024-09-18_12:54:48 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.927791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.333519e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.333519e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 -TOTAL : 0.469351 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,981,672,619 cycles # 2.860 GHz - 2,838,745,918 instructions # 1.43 insn per cycle - 0.751805905 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 829,790,807 cycles:u # 2.462 GHz (71.01%) + 2,840,574 stalled-cycles-frontend:u # 0.34% frontend cycles idle (73.22%) + 25,476,620 stalled-cycles-backend:u # 3.07% backend cycles idle (75.10%) + 1,327,045,699 instructions:u # 1.60 insn per cycle + # 0.02 stalled cycles per insn (74.56%) + 0.445531606 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.989037e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.963677e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.963677e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 -TOTAL : 0.635748 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,515,384,416 cycles # 2.885 GHz - 3,805,896,684 instructions # 1.51 insn per cycle - 0.928757267 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.110811e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.136583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.136583e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.496763 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,424,063,947 cycles # 2.949 GHz - 12,956,460,167 instructions # 2.93 insn per cycle - 1.501035221 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.857646e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.036818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.036818e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.595633 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,753,185,847 cycles # 2.926 GHz - 4,590,460,046 instructions # 2.62 insn per cycle - 0.599868062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.498095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.167392e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.167392e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.320525 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 879,877,577 cycles # 2.713 GHz - 1,955,191,246 instructions # 2.22 insn per cycle - 0.324936571 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.017893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.823832e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.823832e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.294540 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 824,659,177 cycles # 2.764 GHz - 1,871,065,231 instructions # 2.27 insn per cycle - 0.298923642 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.488254e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.923976e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.923976e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.389655 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 750,952,234 cycles # 1.909 GHz - 1,350,104,124 instructions # 1.80 insn per cycle - 0.394048329 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault + 2,962,636,635 cycles:u # 2.883 GHz (74.00%) + 17,010,740 stalled-cycles-frontend:u # 0.57% frontend cycles idle (73.54%) + 830,632,643 stalled-cycles-backend:u # 28.04% backend cycles idle (74.38%) + 3,207,613,891 instructions:u # 1.08 insn per cycle + # 0.26 stalled cycles per insn (75.89%) + 1.062476529 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 67763acaac..6daa4befac 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:16 -DATE: 2024-09-18_12:20:13 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.836197e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.010594e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.149037e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.455895 sec -INFO: No Floating Point Exceptions have been reported - 1,937,517,385 cycles # 2.882 GHz - 2,695,733,072 instructions # 1.39 insn per cycle - 0.731352438 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 755,655,034 cycles:u # 2.280 GHz (75.03%) + 2,296,361 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.95%) + 4,893,982 stalled-cycles-backend:u # 0.65% backend cycles idle (75.09%) + 1,244,581,285 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (76.03%) + 0.373528752 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.416288e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.368760e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.717095e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.494521 sec -INFO: No Floating Point Exceptions have been reported - 2,101,577,521 cycles # 2.872 GHz - 2,967,805,317 instructions # 1.41 insn per cycle - 0.790688389 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.109320e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134422e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134422e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.495018 sec -INFO: No Floating Point Exceptions have been reported - 4,406,318,830 cycles # 2.941 GHz - 12,927,562,871 instructions # 2.93 insn per cycle - 1.499121241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault + 916,714,792 cycles:u # 2.232 GHz (74.77%) + 2,420,584 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.50%) + 9,755,195 stalled-cycles-backend:u # 1.06% backend cycles idle (71.57%) + 1,510,333,163 instructions:u # 1.65 insn per cycle + # 0.01 stalled cycles per insn (73.48%) + 0.464608691 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.896108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.081157e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.081157e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.582933 sec -INFO: No Floating Point Exceptions have been reported - 1,729,684,566 cycles # 2.949 GHz - 4,536,959,704 instructions # 2.62 insn per cycle - 0.587227353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.671417e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.388788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.388788e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.306400 sec -INFO: No Floating Point Exceptions have been reported - 861,419,707 cycles # 2.779 GHz - 1,914,521,871 instructions # 2.22 insn per cycle - 0.310539350 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3549) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.063623e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.871376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.871376e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.288149 sec -INFO: No Floating Point Exceptions have been reported - 805,096,427 cycles # 2.760 GHz - 1,830,123,182 instructions # 2.27 insn per cycle - 0.292238886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.516684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.964575e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.964575e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.382571 sec -INFO: No Floating Point Exceptions have been reported - 732,988,918 cycles # 1.898 GHz - 1,306,469,020 instructions # 1.78 insn per cycle - 0.386957442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1928) (512y: 24) (512z: 2435) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 91e0f5565c..b27e665ecc 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:18 -DATE: 2024-09-18_12:20:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.873875e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.862522e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.986643e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.460578 sec -INFO: No Floating Point Exceptions have been reported - 1,947,256,638 cycles # 2.867 GHz - 2,707,543,109 instructions # 1.39 insn per cycle - 0.736559227 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 745,365,703 cycles:u # 2.195 GHz (75.59%) + 2,274,468 stalled-cycles-frontend:u # 0.31% frontend cycles idle (77.60%) + 11,316,455 stalled-cycles-backend:u # 1.52% backend cycles idle (76.30%) + 1,331,170,438 instructions:u # 1.79 insn per cycle + # 0.01 stalled cycles per insn (74.12%) + 0.376623083 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.023480e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.410580e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.620297e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.541253 sec -INFO: No Floating Point Exceptions have been reported - 2,242,175,675 cycles # 2.877 GHz - 3,202,804,008 instructions # 1.43 insn per cycle - 0.837177537 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.056357e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.078870e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.078870e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.570709 sec -INFO: No Floating Point Exceptions have been reported - 4,639,217,468 cycles # 2.947 GHz - 13,177,906,216 instructions # 2.84 insn per cycle - 1.574828509 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault + 920,133,495 cycles:u # 2.031 GHz (76.22%) + 2,391,120 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.30%) + 8,198,586 stalled-cycles-backend:u # 0.89% backend cycles idle (73.56%) + 1,597,948,911 instructions:u # 1.74 insn per cycle + # 0.01 stalled cycles per insn (71.62%) + 0.492743549 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.872603e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943230e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943230e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.894053 sec -INFO: No Floating Point Exceptions have been reported - 2,648,821,910 cycles # 2.951 GHz - 7,473,297,472 instructions # 2.82 insn per cycle - 0.898331919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.194377e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.403567e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.403567e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.532589 sec -INFO: No Floating Point Exceptions have been reported - 1,476,927,402 cycles # 2.754 GHz - 3,127,083,010 instructions # 2.12 insn per cycle - 0.536841632 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3133) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.590247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.853965e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.853965e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.476058 sec -INFO: No Floating Point Exceptions have been reported - 1,323,043,261 cycles # 2.758 GHz - 2,981,146,980 instructions # 2.25 insn per cycle - 0.480339840 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.287752e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.394431e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.394431e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.737861 sec -INFO: No Floating Point Exceptions have been reported - 1,365,080,339 cycles # 1.841 GHz - 1,989,993,648 instructions # 1.46 insn per cycle - 0.742169497 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2251) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index bc8dd367d2..362b389de7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,258 +1,53 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-09-18_17:18:21 -DATE: 2024-09-18_12:20:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.866883e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.866305e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.974692e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.459058 sec -INFO: No Floating Point Exceptions have been reported - 1,947,107,746 cycles # 2.878 GHz - 2,728,462,242 instructions # 1.40 insn per cycle - 0.734017841 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 754,182,859 cycles:u # 2.249 GHz (76.50%) + 2,383,388 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.24%) + 4,798,872 stalled-cycles-backend:u # 0.64% backend cycles idle (76.25%) + 1,233,149,890 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (76.10%) + 0.378348359 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.016948e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.370809e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.575747e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.539080 sec -INFO: No Floating Point Exceptions have been reported - 2,244,664,779 cycles # 2.884 GHz - 3,243,168,469 instructions # 1.44 insn per cycle - 0.835323761 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482577104625 -Relative difference = 5.209967070245855e-07 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.054078e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.076959e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.076959e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.574290 sec -INFO: No Floating Point Exceptions have been reported - 4,646,036,617 cycles # 2.945 GHz - 13,166,645,489 instructions # 2.83 insn per cycle - 1.578550564 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault + 1,015,084,795 cycles:u # 2.075 GHz (75.59%) + 2,599,616 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.55%) + 8,447,022 stalled-cycles-backend:u # 0.83% backend cycles idle (74.81%) + 1,501,488,691 instructions:u # 1.48 insn per cycle + # 0.01 stalled cycles per insn (73.59%) + 0.527880309 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.873438e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.944671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.944671e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.893595 sec -INFO: No Floating Point Exceptions have been reported - 2,639,674,089 cycles # 2.942 GHz - 7,474,954,292 instructions # 2.83 insn per cycle - 0.897961439 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482734618697 -Relative difference = 5.099411406595165e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.194979e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.406933e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.406933e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.532401 sec -INFO: No Floating Point Exceptions have been reported - 1,471,043,256 cycles # 2.744 GHz - 3,127,494,333 instructions # 2.13 insn per cycle - 0.536715670 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3111) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.604804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.871054e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.871054e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.474052 sec -INFO: No Floating Point Exceptions have been reported - 1,321,700,799 cycles # 2.767 GHz - 2,981,907,836 instructions # 2.26 insn per cycle - 0.478334854 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.246259e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.348752e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.348752e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.751218 sec -INFO: No Floating Point Exceptions have been reported - 1,373,432,632 cycles # 1.819 GHz - 1,989,927,175 instructions # 1.45 insn per cycle - 0.755614240 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2251) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) -========================================================================= - -TEST COMPLETED +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +ERROR! C++ calculation (C++/GPU) failed diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 6ae2d07b8c..95fe9024c8 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:28:53 -DATE: 2024-09-18_13:35:28 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.333836e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.844165e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.406248e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.534496 sec -INFO: No Floating Point Exceptions have been reported - 2,180,885,043 cycles # 2.827 GHz - 3,135,152,783 instructions # 1.44 insn per cycle - 0.828766444 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.710846e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.086431e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.101389e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.566710 sec +INFO: No Floating Point Exceptions have been reported + 984,481,609 cycles:u # 2.095 GHz (74.23%) + 2,500,193 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.70%) + 6,402,428 stalled-cycles-backend:u # 0.65% backend cycles idle (76.35%) + 1,516,815,030 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (76.44%) + 0.633868007 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.605100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.641462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.641462e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.643809 sec -INFO: No Floating Point Exceptions have been reported - 19,303,523,142 cycles # 2.904 GHz - 51,922,542,271 instructions # 2.69 insn per cycle - 6.649309354 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.021746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.065340e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.065340e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 5.401735 sec +INFO: No Floating Point Exceptions have been reported + 16,414,406,731 cycles:u # 3.030 GHz (74.87%) + 9,369,069 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) + 1,982,352,272 stalled-cycles-backend:u # 12.08% backend cycles idle (75.03%) + 51,670,056,906 instructions:u # 3.15 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 5.421856685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 746) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.864838e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.993187e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.993187e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.771971 sec -INFO: No Floating Point Exceptions have been reported - 10,899,823,947 cycles # 2.886 GHz - 30,797,169,430 instructions # 2.83 insn per cycle - 3.777469678 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.443699e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.581147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581147e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 3.257976 sec +INFO: No Floating Point Exceptions have been reported + 9,768,721,970 cycles:u # 2.984 GHz (74.89%) + 8,814,909 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.00%) + 3,067,265,282 stalled-cycles-backend:u # 31.40% backend cycles idle (75.08%) + 30,643,212,993 instructions:u # 3.14 insn per cycle + # 0.10 stalled cycles per insn (75.08%) + 3.278029410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2833) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.618832e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.953390e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.953390e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.382134 sec -INFO: No Floating Point Exceptions have been reported - 6,463,553,394 cycles # 2.708 GHz - 13,666,010,364 instructions # 2.11 insn per cycle - 2.387555326 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.181020e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.606420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.606420e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.908782 sec +INFO: No Floating Point Exceptions have been reported + 5,603,812,106 cycles:u # 2.912 GHz (74.85%) + 8,977,539 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.03%) + 1,293,319,013 stalled-cycles-backend:u # 23.08% backend cycles idle (75.06%) + 13,411,282,597 instructions:u # 2.39 insn per cycle + # 0.10 stalled cycles per insn (75.07%) + 1.929025243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2817) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.007992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.398956e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.398956e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.205089 sec -INFO: No Floating Point Exceptions have been reported - 5,947,846,964 cycles # 2.692 GHz - 13,006,222,979 instructions # 2.19 insn per cycle - 2.210472243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.325208e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.493799e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493799e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.265150 sec -INFO: No Floating Point Exceptions have been reported - 5,846,999,066 cycles # 1.789 GHz - 8,588,678,582 instructions # 1.47 insn per cycle - 3.271052301 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1946) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index a09eaeb7bd..29ed63a3ea 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:29:07 -DATE: 2024-09-18_13:35:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.270085e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.841839e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.403601e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.533386 sec -INFO: No Floating Point Exceptions have been reported - 2,214,034,172 cycles # 2.879 GHz - 3,142,399,923 instructions # 1.42 insn per cycle - 0.826419344 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.726607e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.134440e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.150044e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.438497 sec +INFO: No Floating Point Exceptions have been reported + 972,883,385 cycles:u # 2.087 GHz (76.20%) + 2,368,661 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.99%) + 11,494,612 stalled-cycles-backend:u # 1.18% backend cycles idle (75.99%) + 1,553,739,793 instructions:u # 1.60 insn per cycle + # 0.01 stalled cycles per insn (75.14%) + 0.614122810 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.706120e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.746757e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.746757e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.255979 sec -INFO: No Floating Point Exceptions have been reported - 18,389,967,178 cycles # 2.937 GHz - 50,052,771,539 instructions # 2.72 insn per cycle - 6.261520945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.159672e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.210145e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.210145e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 5.067234 sec +INFO: No Floating Point Exceptions have been reported + 15,404,137,486 cycles:u # 3.031 GHz (74.98%) + 10,569,748 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.99%) + 20,185,422 stalled-cycles-backend:u # 0.13% backend cycles idle (74.99%) + 49,937,587,239 instructions:u # 3.24 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 5.087532895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.086242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.232589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.232589e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.507867 sec -INFO: No Floating Point Exceptions have been reported - 10,373,977,217 cycles # 2.954 GHz - 29,174,589,795 instructions # 2.81 insn per cycle - 3.513510894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.573808e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.720491e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.720491e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 3.145304 sec +INFO: No Floating Point Exceptions have been reported + 9,399,582,430 cycles:u # 2.974 GHz (74.96%) + 10,449,980 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.97%) + 2,379,246,017 stalled-cycles-backend:u # 25.31% backend cycles idle (74.94%) + 29,294,986,128 instructions:u # 3.12 insn per cycle + # 0.08 stalled cycles per insn (74.94%) + 3.196349391 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.355224e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.644479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.644479e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.518598 sec -INFO: No Floating Point Exceptions have been reported - 6,982,239,473 cycles # 2.767 GHz - 15,149,066,703 instructions # 2.17 insn per cycle - 2.524208385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3020) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.345754e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.652728e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.652728e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.170411 sec +INFO: No Floating Point Exceptions have been reported + 6,455,524,959 cycles:u # 2.953 GHz (74.75%) + 9,364,879 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.94%) + 2,030,379,431 stalled-cycles-backend:u # 31.45% backend cycles idle (75.12%) + 15,173,978,080 instructions:u # 2.35 insn per cycle + # 0.13 stalled cycles per insn (75.12%) + 2.190787664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3011) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.542431e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.862341e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.862341e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.419194 sec -INFO: No Floating Point Exceptions have been reported - 6,707,959,962 cycles # 2.767 GHz - 14,619,001,595 instructions # 2.18 insn per cycle - 2.424680502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2621) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.289276e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.449465e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.449465e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.297038 sec -INFO: No Floating Point Exceptions have been reported - 6,083,017,370 cycles # 1.843 GHz - 10,339,705,857 instructions # 1.70 insn per cycle - 3.302657897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2129) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 50a3de8673..4971001236 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:29:22 -DATE: 2024-09-18_13:36:21 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.744477e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.525834e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.617286e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.492808 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.361472e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.486858e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.524263e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 +TOTAL : 0.380721 sec INFO: No Floating Point Exceptions have been reported - 2,047,170,412 cycles # 2.828 GHz - 2,929,586,090 instructions # 1.43 insn per cycle - 0.781904908 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 841,371,168 cycles:u # 2.087 GHz (73.47%) + 2,458,726 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.18%) + 8,058,496 stalled-cycles-backend:u # 0.96% backend cycles idle (74.42%) + 1,505,744,040 instructions:u # 1.79 insn per cycle + # 0.01 stalled cycles per insn (75.25%) + 0.440938607 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525361867622 +Relative difference = 6.615515935930387e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.678996e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.720654e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.720654e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.333448 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,607,993,167 cycles # 2.936 GHz - 51,216,519,035 instructions # 2.75 insn per cycle - 6.339213853 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.247195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.301171e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.301171e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.834772 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 14,778,482,520 cycles:u # 3.051 GHz (74.88%) + 17,012,751 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.91%) + 2,657,130,065 stalled-cycles-backend:u # 17.98% backend cycles idle (74.99%) + 51,520,305,643 instructions:u # 3.49 insn per cycle + # 0.05 stalled cycles per insn (75.07%) + 4.848724315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,33 +86,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 +Avg ME (F77/C++) = 4.3135737704578787 +Relative difference = 5.321390598852464e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.022786e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.287209e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.287209e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.694054 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,934,623,764 cycles # 2.940 GHz - 19,316,417,604 instructions # 2.43 insn per cycle - 2.699461082 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.124807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.418890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.418890e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 2.214214 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,715,567,089 cycles:u # 3.020 GHz (74.75%) + 11,820,134 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.84%) + 2,607,837,420 stalled-cycles-backend:u # 38.83% backend cycles idle (74.92%) + 18,638,250,240 instructions:u # 2.78 insn per cycle + # 0.14 stalled cycles per insn (75.11%) + 2.227937542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3319) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,33 +123,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733226081356 +Relative difference = 7.478907526568244e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.880495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.877642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.877642e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.418247 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,951,478,174 cycles # 2.777 GHz - 8,833,281,557 instructions # 2.24 insn per cycle - 1.423672827 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.953846e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107187e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107187e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.221386 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,581,474,569 cycles:u # 2.910 GHz (74.72%) + 7,444,068 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.69%) + 1,125,447,586 stalled-cycles-backend:u # 31.42% backend cycles idle (74.99%) + 8,605,995,544 instructions:u # 2.40 insn per cycle + # 0.13 stalled cycles per insn (75.31%) + 1.235285796 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3600) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,78 +160,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 +Avg ME (F77/C++) = 4.3135650658514351 +Relative difference = 1.526612799754012e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.368251e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.499225e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.499225e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.339980 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,727,978,138 cycles # 2.773 GHz - 8,431,050,226 instructions # 2.26 insn per cycle - 1.345489073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3541) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.964882e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.513882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.513882e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.846291 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,506,879,162 cycles # 1.895 GHz - 6,243,949,016 instructions # 1.78 insn per cycle - 1.851728712 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2325) (512y: 22) (512z: 2290) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 2b5536237c..329c69ad81 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:29:34 -DATE: 2024-09-18_13:36:43 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.958341e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.585012e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.687226e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.488902 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.516700e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.752203e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.792287e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 +TOTAL : 0.379886 sec INFO: No Floating Point Exceptions have been reported - 2,054,269,206 cycles # 2.862 GHz - 2,934,748,812 instructions # 1.43 insn per cycle - 0.774105073 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 879,908,839 cycles:u # 2.184 GHz (73.86%) + 2,356,817 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.26%) + 7,801,515 stalled-cycles-backend:u # 0.89% backend cycles idle (73.22%) + 1,507,876,997 instructions:u # 1.71 insn per cycle + # 0.01 stalled cycles per insn (76.16%) + 0.441954457 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525361867622 +Relative difference = 6.615515935930387e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.738704e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.782579e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.782579e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.118201 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,018,613,315 cycles # 2.943 GHz - 49,602,263,054 instructions # 2.75 insn per cycle - 6.123752242 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.413549e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.475820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.475820e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.512885 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 13,834,776,140 cycles:u # 3.059 GHz (74.91%) + 17,954,574 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.92%) + 294,659,305 stalled-cycles-backend:u # 2.13% backend cycles idle (75.01%) + 49,452,636,042 instructions:u # 3.57 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 4.526797728 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,33 +86,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 +Avg ME (F77/C++) = 4.3135737704578787 +Relative difference = 5.321390598852464e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.513439e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.846420e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.846420e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.410664 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,118,641,278 cycles # 2.947 GHz - 18,533,207,759 instructions # 2.60 insn per cycle - 2.416130283 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.029890e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.445427e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.445427e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 1.906181 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,696,213,504 cycles:u # 2.973 GHz (74.95%) + 11,922,190 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.95%) + 1,744,482,482 stalled-cycles-backend:u # 30.63% backend cycles idle (74.99%) + 18,249,984,554 instructions:u # 3.20 insn per cycle + # 0.10 stalled cycles per insn (74.99%) + 1.920527841 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3078) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,33 +123,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733226081356 +Relative difference = 7.478907526568244e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.337179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.778552e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.778552e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.052609 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,666,208,381 cycles # 2.754 GHz - 10,850,402,094 instructions # 1.91 insn per cycle - 2.057862471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.422216e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.025606e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.025606e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.579219 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,687,540,598 cycles:u # 2.951 GHz (74.87%) + 8,469,245 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.83%) + 1,829,658,675 stalled-cycles-backend:u # 39.03% backend cycles idle (74.87%) + 10,816,107,004 instructions:u # 2.31 insn per cycle + # 0.17 stalled cycles per insn (74.87%) + 1.593000379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4259) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,82 +160,18 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 +Avg ME (F77/C++) = 4.3135650658514351 +Relative difference = 1.526612799754012e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.416639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.866517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.866517e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.022314 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,555,880,143 cycles # 2.741 GHz - 10,551,186,314 instructions # 1.90 insn per cycle - 2.027927255 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4138) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.322863e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.603781e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.603781e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.514102 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,668,008,181 cycles # 1.854 GHz - 8,659,615,849 instructions # 1.86 insn per cycle - 2.519706497 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 3c9a7750d0..f201fc1612 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:29:46 -DATE: 2024-09-18_13:37:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.259037e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.833623e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.391198e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.533343 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.708542e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.083156e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.097984e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.442135 sec INFO: No Floating Point Exceptions have been reported - 2,205,791,107 cycles # 2.867 GHz - 3,166,074,888 instructions # 1.44 insn per cycle - 0.826367468 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,032,232,511 cycles:u # 2.193 GHz (73.99%) + 2,557,831 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.54%) + 7,206,774 stalled-cycles-backend:u # 0.70% backend cycles idle (74.51%) + 1,625,877,587 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.28%) + 0.509858346 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.526469e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.558963e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.558963e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.979629 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 20,509,216,850 cycles # 2.937 GHz - 51,923,869,243 instructions # 2.53 insn per cycle - 6.985125737 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.933291e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972913e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972913e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 5.636784 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 17,161,131,724 cycles:u # 3.036 GHz (74.96%) + 35,183,856 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.95%) + 2,366,266,520 stalled-cycles-backend:u # 13.79% backend cycles idle (74.95%) + 51,682,146,779 instructions:u # 3.01 insn per cycle + # 0.05 stalled cycles per insn (75.01%) + 5.657081378 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +86,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +95,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.719239e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.833565e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.833565e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.966787 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,507,632,981 cycles # 2.897 GHz - 30,592,941,946 instructions # 2.66 insn per cycle - 3.972658763 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.443515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.580774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.580774e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 3.257345 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 9,805,457,186 cycles:u # 2.996 GHz (74.89%) + 15,135,275 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.01%) + 3,040,971,113 stalled-cycles-backend:u # 31.01% backend cycles idle (75.07%) + 30,521,333,927 instructions:u # 3.11 insn per cycle + # 0.10 stalled cycles per insn (75.07%) + 3.277508059 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2927) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +123,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +132,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.525746e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.838241e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.838241e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.427006 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,694,021,096 cycles # 2.753 GHz - 13,606,483,540 instructions # 2.03 insn per cycle - 2.432521216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.320550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.767220e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.767220e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.871561 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,482,068,682 cycles:u # 2.906 GHz (74.99%) + 13,194,340 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.99%) + 1,326,892,594 stalled-cycles-backend:u # 24.20% backend cycles idle (75.02%) + 13,323,379,955 instructions:u # 2.43 insn per cycle + # 0.10 stalled cycles per insn (75.03%) + 1.891317443 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +160,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,73 +169,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.956630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.333349e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.333349e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.225975 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,165,401,380 cycles # 2.764 GHz - 12,974,481,027 instructions # 2.10 insn per cycle - 2.231658259 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.095455e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.237519e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.237519e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.497825 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,439,450,147 cycles # 1.839 GHz - 8,701,510,932 instructions # 1.35 insn per cycle - 3.503267717 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2014) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 008d0a9d35..c22be99f5f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-09-18_19:30:01 -DATE: 2024-09-18_13:37:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.252482e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.819370e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.388849e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.535514 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.719205e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.142967e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.158724e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.432342 sec INFO: No Floating Point Exceptions have been reported - 2,214,143,603 cycles # 2.876 GHz - 3,159,539,235 instructions # 1.43 insn per cycle - 0.828878265 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 968,936,180 cycles:u # 2.116 GHz (74.90%) + 2,476,132 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.64%) + 5,655,333 stalled-cycles-backend:u # 0.58% backend cycles idle (75.65%) + 1,567,897,246 instructions:u # 1.62 insn per cycle + # 0.00 stalled cycles per insn (73.82%) + 0.495612886 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.608272e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.644385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.644385e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.629223 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 19,498,919,287 cycles # 2.939 GHz - 49,953,158,127 instructions # 2.56 insn per cycle - 6.634747708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.136897e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.186073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.186073e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 5.117671 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 15,609,174,700 cycles:u # 3.041 GHz (74.91%) + 32,659,892 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.98%) + 59,265,580 stalled-cycles-backend:u # 0.38% backend cycles idle (75.06%) + 49,816,715,411 instructions:u # 3.19 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 5.137159828 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 652) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +86,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +95,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.887478e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.016037e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.016037e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.741933 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,045,759,193 cycles # 2.948 GHz - 29,138,468,069 instructions # 2.64 insn per cycle - 3.747566884 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.526138e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.669586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.669586e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 3.185940 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 9,543,011,529 cycles:u # 2.981 GHz (75.01%) + 15,702,421 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.01%) + 1,892,189,851 stalled-cycles-backend:u # 19.83% backend cycles idle (75.02%) + 29,001,613,634 instructions:u # 3.04 insn per cycle + # 0.07 stalled cycles per insn (75.03%) + 3.205548627 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +123,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +132,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.735821e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.946793e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.946793e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.917806 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,064,126,572 cycles # 2.759 GHz - 15,188,166,070 instructions # 1.88 insn per cycle - 2.923408860 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3203) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.204840e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.502191e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502191e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.226867 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,562,119,728 cycles:u # 2.926 GHz (75.04%) + 19,235,879 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.03%) + 2,242,235,753 stalled-cycles-backend:u # 34.17% backend cycles idle (75.03%) + 15,061,934,031 instructions:u # 2.30 insn per cycle + # 0.15 stalled cycles per insn (74.89%) + 2.246971422 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +160,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,73 +169,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.934941e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.167424e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.167424e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.773801 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,685,843,393 cycles # 2.766 GHz - 14,482,526,269 instructions # 1.88 insn per cycle - 2.779397074 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 304) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.028557e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.163339e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.163339e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.571755 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,530,752,454 cycles # 1.826 GHz - 9,894,967,129 instructions # 1.52 insn per cycle - 3.577461945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2216) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 052ae7ee83..2c146f6b15 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:17 -DATE: 2024-09-18_13:34:22 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.764082e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.781890e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.785142e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.472813 sec -INFO: No Floating Point Exceptions have been reported - 1,988,958,737 cycles # 2.864 GHz - 2,937,434,860 instructions # 1.48 insn per cycle - 0.752740146 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.668468e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.143595e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.145469e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.439278 sec +INFO: No Floating Point Exceptions have been reported + 1,037,942,369 cycles:u # 2.354 GHz (75.69%) + 2,302,219 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.65%) + 5,992,381 stalled-cycles-backend:u # 0.58% backend cycles idle (75.86%) + 1,587,348,327 instructions:u # 1.53 insn per cycle + # 0.00 stalled cycles per insn (74.08%) + 0.494244162 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.003017e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.119483e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.127951e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.489631 sec -INFO: No Floating Point Exceptions have been reported - 2,045,084,255 cycles # 2.869 GHz - 3,023,069,261 instructions # 1.48 insn per cycle - 0.771335484 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.119449e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268041e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268542e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.450274 sec +INFO: No Floating Point Exceptions have been reported + 1,125,286,471 cycles:u # 2.408 GHz (74.88%) + 2,461,221 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.05%) + 10,321,273 stalled-cycles-backend:u # 0.92% backend cycles idle (75.21%) + 1,590,720,369 instructions:u # 1.41 insn per cycle + # 0.01 stalled cycles per insn (74.40%) + 0.505235841 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.395968e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.399199e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.399199e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.157638 sec -INFO: No Floating Point Exceptions have been reported - 469,190,775 cycles # 2.915 GHz - 1,389,792,831 instructions # 2.96 insn per cycle - 0.161480291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.512477e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.517340e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.517340e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.118985 sec +INFO: No Floating Point Exceptions have been reported + 363,156,268 cycles:u # 2.984 GHz (73.84%) + 34,260 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.74%) + 40,619,768 stalled-cycles-backend:u # 11.19% backend cycles idle (73.74%) + 1,331,399,379 instructions:u # 3.67 insn per cycle + # 0.03 stalled cycles per insn (73.74%) + 0.125991835 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.497864e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.511372e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.511372e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.083349 sec -INFO: No Floating Point Exceptions have been reported - 241,222,273 cycles # 2.780 GHz - 693,002,253 instructions # 2.87 insn per cycle - 0.087370180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.791838e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.809548e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.809548e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.062074 sec +INFO: No Floating Point Exceptions have been reported + 194,932,402 cycles:u # 3.011 GHz (75.11%) + 30,067 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.35%) + 22,522,994 stalled-cycles-backend:u # 11.55% backend cycles idle (75.35%) + 668,460,952 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (75.35%) + 0.069048897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.431164e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.437397e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.437397e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038955 sec -INFO: No Floating Point Exceptions have been reported - 115,308,474 cycles # 2.709 GHz - 257,920,071 instructions # 2.24 insn per cycle - 0.043236547 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8501) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.870399e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.878181e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.878181e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.030271 sec +INFO: No Floating Point Exceptions have been reported + 89,416,327 cycles:u # 2.716 GHz (76.20%) + 71,315 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.83%) + 11,520,109 stalled-cycles-backend:u # 12.88% backend cycles idle (75.83%) + 238,428,202 instructions:u # 2.67 insn per cycle + # 0.05 stalled cycles per insn (75.82%) + 0.037318574 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7869) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.580017e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.587312e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.587312e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.035473 sec -INFO: No Floating Point Exceptions have been reported - 102,969,893 cycles # 2.655 GHz - 240,051,517 instructions # 2.33 insn per cycle - 0.039391596 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8143) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.194413e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.199659e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.199659e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.046450 sec -INFO: No Floating Point Exceptions have been reported - 90,344,224 cycles # 1.811 GHz - 134,320,028 instructions # 1.49 insn per cycle - 0.050486009 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7086) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index a192f75604..399355ad74 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:23 -DATE: 2024-09-18_13:34:33 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.801517e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.819462e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.822714e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.476112 sec -INFO: No Floating Point Exceptions have been reported - 2,002,117,199 cycles # 2.852 GHz - 2,866,160,766 instructions # 1.43 insn per cycle - 0.760468280 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.984818e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.488022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.490089e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.418542 sec +INFO: No Floating Point Exceptions have been reported + 1,027,313,403 cycles:u # 2.363 GHz (75.92%) + 2,492,833 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.76%) + 5,138,161 stalled-cycles-backend:u # 0.50% backend cycles idle (76.29%) + 1,497,202,732 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (75.95%) + 0.474829105 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.078707e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.191865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.200143e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.489613 sec -INFO: No Floating Point Exceptions have been reported - 2,042,241,108 cycles # 2.868 GHz - 2,998,598,647 instructions # 1.47 insn per cycle - 0.772399138 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.150169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.294351e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.294874e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.447753 sec +INFO: No Floating Point Exceptions have been reported + 1,107,969,800 cycles:u # 2.380 GHz (75.88%) + 2,409,302 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.95%) + 6,691,932 stalled-cycles-backend:u # 0.60% backend cycles idle (75.98%) + 1,616,550,803 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (73.95%) + 0.503491277 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.407520e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.410948e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.410948e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.156563 sec -INFO: No Floating Point Exceptions have been reported - 466,584,758 cycles # 2.920 GHz - 1,385,250,664 instructions # 2.97 insn per cycle - 0.160376464 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.524239e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.529078e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.529078e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.118128 sec +INFO: No Floating Point Exceptions have been reported + 368,723,264 cycles:u # 3.053 GHz (72.37%) + 30,919 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.55%) + 47,382,382 stalled-cycles-backend:u # 12.85% backend cycles idle (73.55%) + 1,330,184,390 instructions:u # 3.61 insn per cycle + # 0.04 stalled cycles per insn (73.55%) + 0.125307087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1597) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.449696e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.462962e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.462962e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.083015 sec -INFO: No Floating Point Exceptions have been reported - 239,636,465 cycles # 2.770 GHz - 689,080,119 instructions # 2.88 insn per cycle - 0.087201828 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.773195e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.790673e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.790673e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.061655 sec +INFO: No Floating Point Exceptions have been reported + 184,751,906 cycles:u # 2.876 GHz (75.16%) + 49,149 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.16%) + 20,265,437 stalled-cycles-backend:u # 10.97% backend cycles idle (75.16%) + 666,237,753 instructions:u # 3.61 insn per cycle + # 0.03 stalled cycles per insn (75.16%) + 0.068873648 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.414254e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.419861e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.419861e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038646 sec -INFO: No Floating Point Exceptions have been reported - 111,994,100 cycles # 2.669 GHz - 253,518,298 instructions # 2.26 insn per cycle - 0.042520952 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8457) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.869543e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877173e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877173e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.029734 sec +INFO: No Floating Point Exceptions have been reported + 86,265,079 cycles:u # 2.667 GHz (75.40%) + 21,439 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.40%) + 9,672,702 stalled-cycles-backend:u # 11.21% backend cycles idle (75.40%) + 235,809,484 instructions:u # 2.73 insn per cycle + # 0.04 stalled cycles per insn (75.40%) + 0.036725984 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7839) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.642367e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.650155e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.650155e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.033489 sec -INFO: No Floating Point Exceptions have been reported - 100,655,003 cycles # 2.733 GHz - 235,667,417 instructions # 2.34 insn per cycle - 0.037423166 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8101) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.198873e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.203973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.203973e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.045540 sec -INFO: No Floating Point Exceptions have been reported - 88,110,981 cycles # 1.799 GHz - 129,713,745 instructions # 1.47 insn per cycle - 0.049588057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7084) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 0a43242226..8963676ee4 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:29 -DATE: 2024-09-18_13:34:44 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.214942e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.224129e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.226295e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.475163 sec -INFO: No Floating Point Exceptions have been reported - 1,995,760,495 cycles # 2.876 GHz - 2,898,607,116 instructions # 1.45 insn per cycle - 0.751350588 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.154909e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.304119e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.304678e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 +TOTAL : 0.410468 sec +INFO: No Floating Point Exceptions have been reported + 968,020,432 cycles:u # 2.381 GHz (74.32%) + 2,566,500 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.57%) + 8,893,630 stalled-cycles-backend:u # 0.92% backend cycles idle (74.04%) + 1,495,841,709 instructions:u # 1.55 insn per cycle + # 0.01 stalled cycles per insn (75.32%) + 0.465948790 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.954269e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.031370e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.039107e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 -TOTAL : 0.476219 sec -INFO: No Floating Point Exceptions have been reported - 1,999,149,645 cycles # 2.878 GHz - 2,913,422,324 instructions # 1.46 insn per cycle - 0.751593441 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.840077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.357410e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.358906e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 +TOTAL : 0.409687 sec +INFO: No Floating Point Exceptions have been reported + 1,012,721,169 cycles:u # 2.379 GHz (75.55%) + 2,506,424 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.57%) + 5,337,641 stalled-cycles-backend:u # 0.53% backend cycles idle (75.67%) + 1,521,579,474 instructions:u # 1.50 insn per cycle + # 0.00 stalled cycles per insn (73.13%) + 0.462809781 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272869669930272E-006 -Relative difference = 4.548524165778887e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127375e-06 +Avg ME (F77/GPU) = 8.1275160277913510E-006 +Relative difference = 1.735219444797551e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.411294e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.414706e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.414706e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.156868 sec -INFO: No Floating Point Exceptions have been reported - 464,525,374 cycles # 2.900 GHz - 1,382,008,460 instructions # 2.98 insn per cycle - 0.160803882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.665055e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.670106e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.670106e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.115176 sec +INFO: No Floating Point Exceptions have been reported + 349,347,512 cycles:u # 2.966 GHz (72.87%) + 36,350 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.87%) + 46,649,985 stalled-cycles-backend:u # 13.35% backend cycles idle (72.20%) + 1,343,502,173 instructions:u # 3.85 insn per cycle + # 0.03 stalled cycles per insn (75.59%) + 0.122057755 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1635) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278101435899343E-006 +Relative difference = 1.76664974860306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.203598e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.208165e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.208165e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.045866 sec -INFO: No Floating Point Exceptions have been reported - 133,138,155 cycles # 2.706 GHz - 372,169,369 instructions # 2.80 insn per cycle - 0.049817482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.658219e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664629e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664629e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.033799 sec +INFO: No Floating Point Exceptions have been reported + 111,754,632 cycles:u # 3.069 GHz (66.33%) + 25,690 stalled-cycles-frontend:u # 0.02% frontend cycles idle (78.14%) + 15,926,909 stalled-cycles-backend:u # 14.25% backend cycles idle (78.13%) + 350,152,006 instructions:u # 3.13 insn per cycle + # 0.05 stalled cycles per insn (78.14%) + 0.040914810 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071402353976E-006 +Relative difference = 1.725378052944308e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.784499e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.809977e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.809977e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020924 sec -INFO: No Floating Point Exceptions have been reported - 65,424,959 cycles # 2.700 GHz - 142,812,066 instructions # 2.18 insn per cycle - 0.024819725 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9241) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.616514e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647967e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.647967e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.016569 sec +INFO: No Floating Point Exceptions have been reported + 47,284,090 cycles:u # 2.458 GHz (61.55%) + 22,430 stalled-cycles-frontend:u # 0.05% frontend cycles idle (58.79%) + 4,344,587 stalled-cycles-backend:u # 9.19% backend cycles idle (58.78%) + 130,336,025 instructions:u # 2.76 insn per cycle + # 0.03 stalled cycles per insn (67.09%) + 0.023718224 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8628) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275351122593251E-006 +Relative difference = 1.3812222848044195e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.962557e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.993097e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.993097e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.019867 sec -INFO: No Floating Point Exceptions have been reported - 60,581,334 cycles # 2.611 GHz - 132,865,474 instructions # 2.19 insn per cycle - 0.023738141 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8959) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.316896e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.339579e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.339579e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.024992 sec -INFO: No Floating Point Exceptions have been reported - 52,575,011 cycles # 1.850 GHz - 79,563,519 instructions # 1.51 insn per cycle - 0.029028726 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2836) (512y: 30) (512z: 7437) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 81fec428b9..7cdd584b43 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:35 -DATE: 2024-09-18_13:34:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.237744e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.247254e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.249233e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.475021 sec -INFO: No Floating Point Exceptions have been reported - 1,993,743,022 cycles # 2.872 GHz - 2,918,324,117 instructions # 1.46 insn per cycle - 0.750958800 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.144180e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.289182e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.289765e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 +TOTAL : 0.387363 sec +INFO: No Floating Point Exceptions have been reported + 957,531,938 cycles:u # 2.375 GHz (75.15%) + 2,450,003 stalled-cycles-frontend:u # 0.26% frontend cycles idle (76.17%) + 6,366,991 stalled-cycles-backend:u # 0.66% backend cycles idle (76.29%) + 1,403,761,770 instructions:u # 1.47 insn per cycle + # 0.00 stalled cycles per insn (76.27%) + 0.442190638 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.067375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.148140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.156186e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.476461 sec -INFO: No Floating Point Exceptions have been reported - 1,993,725,610 cycles # 2.868 GHz - 2,900,779,066 instructions # 1.45 insn per cycle - 0.752726088 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.815796e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.310260e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.311812e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 +TOTAL : 0.410025 sec +INFO: No Floating Point Exceptions have been reported + 1,018,395,407 cycles:u # 2.395 GHz (75.25%) + 2,460,070 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.36%) + 7,419,542 stalled-cycles-backend:u # 0.73% backend cycles idle (74.27%) + 1,529,000,005 instructions:u # 1.50 insn per cycle + # 0.00 stalled cycles per insn (73.41%) + 0.466391689 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272866419447706E-006 -Relative difference = 4.508529302013153e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127375e-06 +Avg ME (F77/GPU) = 8.1275164883853706E-006 +Relative difference = 1.740886637704508e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.438799e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.442175e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.442175e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.155006 sec -INFO: No Floating Point Exceptions have been reported - 462,147,018 cycles # 2.920 GHz - 1,376,798,562 instructions # 2.98 insn per cycle - 0.158894971 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.661400e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.666444e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.666444e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.114706 sec +INFO: No Floating Point Exceptions have been reported + 352,310,785 cycles:u # 3.003 GHz (73.30%) + 32,201 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.77%) + 39,110,027 stalled-cycles-backend:u # 11.10% backend cycles idle (72.77%) + 1,325,206,070 instructions:u # 3.76 insn per cycle + # 0.03 stalled cycles per insn (72.95%) + 0.121742553 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1608) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278101435899343E-006 +Relative difference = 1.76664974860306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.224501e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.229267e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.229267e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.044280 sec -INFO: No Floating Point Exceptions have been reported - 130,643,774 cycles # 2.744 GHz - 367,253,267 instructions # 2.81 insn per cycle - 0.048214582 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.663127e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.669548e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669548e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.033155 sec +INFO: No Floating Point Exceptions have been reported + 107,195,720 cycles:u # 2.992 GHz (69.15%) + 20,939 stalled-cycles-frontend:u # 0.02% frontend cycles idle (77.78%) + 15,459,235 stalled-cycles-backend:u # 14.42% backend cycles idle (77.78%) + 347,025,294 instructions:u # 3.24 insn per cycle + # 0.04 stalled cycles per insn (77.78%) + 0.040164534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9253) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071402353976E-006 +Relative difference = 1.725378052944308e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.785213e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.809806e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.809806e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020207 sec -INFO: No Floating Point Exceptions have been reported - 63,247,605 cycles # 2.692 GHz - 138,006,301 instructions # 2.18 insn per cycle - 0.024065097 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9196) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.612156e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.642468e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.642468e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.016083 sec +INFO: No Floating Point Exceptions have been reported + 48,800,876 cycles:u # 2.613 GHz (62.04%) + 19,322 stalled-cycles-frontend:u # 0.04% frontend cycles idle (57.57%) + 4,617,313 stalled-cycles-backend:u # 9.46% backend cycles idle (57.56%) + 126,177,930 instructions:u # 2.59 insn per cycle + # 0.04 stalled cycles per insn (63.92%) + 0.022738865 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275351122593251E-006 +Relative difference = 1.3812222848044195e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.053192e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.081685e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.081685e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.018541 sec -INFO: No Floating Point Exceptions have been reported - 58,249,945 cycles # 2.668 GHz - 127,981,629 instructions # 2.20 insn per cycle - 0.022408862 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8910) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.336383e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.358299e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.358299e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.024035 sec -INFO: No Floating Point Exceptions have been reported - 50,478,559 cycles # 1.838 GHz - 74,763,022 instructions # 1.48 insn per cycle - 0.028059996 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2791) (512y: 30) (512z: 7439) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 59d9b0aed3..b711462f1b 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:40 -DATE: 2024-09-18_13:35:06 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.754823e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.776415e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.779447e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.471377 sec -INFO: No Floating Point Exceptions have been reported - 1,997,323,985 cycles # 2.874 GHz - 2,899,694,458 instructions # 1.45 insn per cycle - 0.752307454 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.661949e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.199247e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.201096e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.417312 sec +INFO: No Floating Point Exceptions have been reported + 1,062,085,104 cycles:u # 2.456 GHz (76.43%) + 2,355,484 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.05%) + 6,266,104 stalled-cycles-backend:u # 0.59% backend cycles idle (74.78%) + 1,591,646,638 instructions:u # 1.50 insn per cycle + # 0.00 stalled cycles per insn (73.98%) + 0.466599491 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.948061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.061017e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.069565e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.489196 sec -INFO: No Floating Point Exceptions have been reported - 2,036,665,309 cycles # 2.870 GHz - 3,021,584,007 instructions # 1.48 insn per cycle - 0.771275990 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.124047e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.274352e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.274851e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.471017 sec +INFO: No Floating Point Exceptions have been reported + 1,152,786,923 cycles:u # 2.462 GHz (74.79%) + 2,473,272 stalled-cycles-frontend:u # 0.21% frontend cycles idle (76.01%) + 5,503,635 stalled-cycles-backend:u # 0.48% backend cycles idle (76.15%) + 1,578,639,124 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (76.21%) + 0.521582548 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.346871e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.350343e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.350343e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.160031 sec -INFO: No Floating Point Exceptions have been reported - 472,933,421 cycles # 2.893 GHz - 1,398,381,136 instructions # 2.96 insn per cycle - 0.164085482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.550757e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.555611e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.555611e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.118022 sec +INFO: No Floating Point Exceptions have been reported + 362,163,408 cycles:u # 3.001 GHz (73.88%) + 38,253 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.52%) + 38,323,839 stalled-cycles-backend:u # 10.58% backend cycles idle (73.52%) + 1,339,885,187 instructions:u # 3.70 insn per cycle + # 0.03 stalled cycles per insn (73.52%) + 0.125046952 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.641661e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.653702e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.653702e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.081430 sec -INFO: No Floating Point Exceptions have been reported - 237,272,954 cycles # 2.797 GHz - 688,192,491 instructions # 2.90 insn per cycle - 0.085340914 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.867085e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.884293e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.884293e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.061531 sec +INFO: No Floating Point Exceptions have been reported + 184,178,339 cycles:u # 2.873 GHz (75.11%) + 30,115 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) + 23,284,087 stalled-cycles-backend:u # 12.64% backend cycles idle (75.11%) + 664,412,984 instructions:u # 3.61 insn per cycle + # 0.04 stalled cycles per insn (75.11%) + 0.068525162 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8728) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.416781e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.422580e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.422580e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.039309 sec -INFO: No Floating Point Exceptions have been reported - 114,214,565 cycles # 2.672 GHz - 253,122,283 instructions # 2.22 insn per cycle - 0.043386095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.903962e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.912090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912090e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.029761 sec +INFO: No Floating Point Exceptions have been reported + 94,108,055 cycles:u # 2.906 GHz (75.81%) + 26,438 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.43%) + 10,714,661 stalled-cycles-backend:u # 11.39% backend cycles idle (75.43%) + 234,365,644 instructions:u # 2.49 insn per cycle + # 0.05 stalled cycles per insn (75.43%) + 0.036588127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7892) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.596060e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.604256e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.604256e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.035081 sec -INFO: No Floating Point Exceptions have been reported - 101,856,642 cycles # 2.646 GHz - 233,656,157 instructions # 2.29 insn per cycle - 0.039147600 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.146549e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.151691e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.151691e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.048254 sec -INFO: No Floating Point Exceptions have been reported - 91,587,165 cycles # 1.768 GHz - 133,174,500 instructions # 1.45 insn per cycle - 0.052446048 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6354) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 6686b30b4b..2b7efd8bd5 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-09-18_19:28:46 -DATE: 2024-09-18_13:35:17 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.784162e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.808686e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.813201e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.333667 sec -INFO: No Floating Point Exceptions have been reported - 1,240,871,647 cycles # 2.848 GHz - 2,449,109,840 instructions # 1.97 insn per cycle - 0.615625688 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.969887e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.486074e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.488127e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.441981 sec +INFO: No Floating Point Exceptions have been reported + 1,017,798,254 cycles:u # 2.342 GHz (76.18%) + 2,308,984 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.10%) + 6,452,844 stalled-cycles-backend:u # 0.63% backend cycles idle (75.85%) + 1,567,263,249 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (72.74%) + 0.493323757 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.062992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.177330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.186889e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.492547 sec -INFO: No Floating Point Exceptions have been reported - 2,036,892,131 cycles # 2.842 GHz - 3,016,344,751 instructions # 1.48 insn per cycle - 0.776057336 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.157579e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.308611e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.309138e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.467491 sec +INFO: No Floating Point Exceptions have been reported + 1,116,307,170 cycles:u # 2.398 GHz (75.15%) + 2,467,208 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.96%) + 13,793,394 stalled-cycles-backend:u # 1.24% backend cycles idle (75.35%) + 1,640,893,198 instructions:u # 1.47 insn per cycle + # 0.01 stalled cycles per insn (73.85%) + 0.524297744 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.396244e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.399563e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.399563e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.156900 sec -INFO: No Floating Point Exceptions have been reported - 468,878,349 cycles # 2.927 GHz - 1,393,744,642 instructions # 2.97 insn per cycle - 0.160773641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.525460e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.530238e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.530238e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.118122 sec +INFO: No Floating Point Exceptions have been reported + 360,120,470 cycles:u # 2.981 GHz (73.64%) + 31,355 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.55%) + 44,381,360 stalled-cycles-backend:u # 12.32% backend cycles idle (73.55%) + 1,339,239,764 instructions:u # 3.72 insn per cycle + # 0.03 stalled cycles per insn (73.55%) + 0.124960214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.703638e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.716215e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.716215e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080002 sec -INFO: No Floating Point Exceptions have been reported - 235,588,650 cycles # 2.827 GHz - 684,259,138 instructions # 2.90 insn per cycle - 0.083821193 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.937216e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.955824e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.955824e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.060516 sec +INFO: No Floating Point Exceptions have been reported + 187,016,868 cycles:u # 2.962 GHz (75.81%) + 25,537 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.73%) + 22,517,025 stalled-cycles-backend:u # 12.04% backend cycles idle (74.73%) + 661,818,506 instructions:u # 3.54 insn per cycle + # 0.03 stalled cycles per insn (74.73%) + 0.067401522 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8787) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.433569e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.439450e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.439450e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038120 sec -INFO: No Floating Point Exceptions have been reported - 111,841,703 cycles # 2.696 GHz - 248,650,538 instructions # 2.22 insn per cycle - 0.042017351 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.897525e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.905614e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.905614e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.029335 sec +INFO: No Floating Point Exceptions have been reported + 85,082,223 cycles:u # 2.666 GHz (75.07%) + 26,017 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.07%) + 11,186,343 stalled-cycles-backend:u # 13.15% backend cycles idle (75.06%) + 231,761,669 instructions:u # 2.72 insn per cycle + # 0.05 stalled cycles per insn (75.07%) + 0.036167421 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.614208e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.621785e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.621785e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034000 sec -INFO: No Floating Point Exceptions have been reported - 99,535,427 cycles # 2.668 GHz - 229,238,314 instructions # 2.30 insn per cycle - 0.037858332 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.195361e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.200436e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.200436e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.045690 sec -INFO: No Floating Point Exceptions have been reported - 89,777,680 cycles # 1.821 GHz - 128,604,385 instructions # 1.43 insn per cycle - 0.049950768 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6355) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 62aa2351ef..dc1aa58764 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:27:38 -DATE: 2024-09-18_13:33:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.107848e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.349751e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.801252e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.525011 sec -INFO: No Floating Point Exceptions have been reported - 2,191,922,668 cycles # 2.881 GHz - 3,124,854,662 instructions # 1.43 insn per cycle - 0.820527123 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.263984e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167835e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.215993e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.402908 sec +INFO: No Floating Point Exceptions have been reported + 790,493,625 cycles:u # 1.906 GHz (74.68%) + 2,335,099 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.43%) + 6,129,545 stalled-cycles-backend:u # 0.78% backend cycles idle (76.53%) + 1,422,912,336 instructions:u # 1.80 insn per cycle + # 0.00 stalled cycles per insn (74.72%) + 0.465147442 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.117531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040993e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.040993e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.263727 sec -INFO: No Floating Point Exceptions have been reported - 3,735,375,700 cycles # 2.944 GHz - 9,727,971,651 instructions # 2.60 insn per cycle - 1.269703149 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.038360e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166278e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166278e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.225705 sec +INFO: No Floating Point Exceptions have been reported + 3,452,727,402 cycles:u # 2.783 GHz (75.04%) + 8,530,310 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.88%) + 12,832,682 stalled-cycles-backend:u # 0.37% backend cycles idle (74.91%) + 9,634,837,721 instructions:u # 2.79 insn per cycle + # 0.00 stalled cycles per insn (74.92%) + 1.245159412 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.512691e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947484e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947484e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.807869 sec -INFO: No Floating Point Exceptions have been reported - 2,332,400,363 cycles # 2.869 GHz - 5,932,883,831 instructions # 2.54 insn per cycle - 0.813712795 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.946379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.485940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.485940e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.754874 sec +INFO: No Floating Point Exceptions have been reported + 1,995,700,116 cycles:u # 2.590 GHz (75.09%) + 8,309,863 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.09%) + 9,001,605 stalled-cycles-backend:u # 0.45% backend cycles idle (75.09%) + 5,915,981,455 instructions:u # 2.96 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 0.775038093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1321) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.185960e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.183533e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.183533e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.594661 sec -INFO: No Floating Point Exceptions have been reported - 1,663,371,411 cycles # 2.773 GHz - 3,314,486,720 instructions # 1.99 insn per cycle - 0.600516021 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.023188e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.415258e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.415258e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.561936 sec +INFO: No Floating Point Exceptions have been reported + 1,401,006,195 cycles:u # 2.425 GHz (75.10%) + 8,413,066 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.08%) + 18,327,903 stalled-cycles-backend:u # 1.31% backend cycles idle (75.08%) + 3,284,856,607 instructions:u # 2.34 insn per cycle + # 0.01 stalled cycles per insn (75.20%) + 0.582102691 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.219367e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.251513e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.251513e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.587632 sec -INFO: No Floating Point Exceptions have been reported - 1,614,839,496 cycles # 2.724 GHz - 3,284,546,277 instructions # 2.03 insn per cycle - 0.593339482 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.129616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.055946e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.055946e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.606967 sec -INFO: No Floating Point Exceptions have been reported - 1,366,903,692 cycles # 2.234 GHz - 2,424,948,880 instructions # 1.77 insn per cycle - 0.612713832 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 239bb47b8a..4a0aff8aa2 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:27:45 -DATE: 2024-09-18_13:33:22 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.181260e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.490249e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.991797e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.525256 sec -INFO: No Floating Point Exceptions have been reported - 2,186,851,153 cycles # 2.864 GHz - 3,107,286,620 instructions # 1.42 insn per cycle - 0.822741231 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.301680e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.173188e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222396e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.384632 sec +INFO: No Floating Point Exceptions have been reported + 846,707,623 cycles:u # 2.055 GHz (74.90%) + 2,390,529 stalled-cycles-frontend:u # 0.28% frontend cycles idle (76.43%) + 7,195,648 stalled-cycles-backend:u # 0.85% backend cycles idle (75.75%) + 1,429,395,666 instructions:u # 1.69 insn per cycle + # 0.01 stalled cycles per insn (74.83%) + 0.449705601 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.043560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.033362e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.033362e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.273177 sec -INFO: No Floating Point Exceptions have been reported - 3,715,871,529 cycles # 2.906 GHz - 9,610,590,320 instructions # 2.59 insn per cycle - 1.279195540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.037912e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165841e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165841e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.224624 sec +INFO: No Floating Point Exceptions have been reported + 3,447,640,527 cycles:u # 2.779 GHz (74.92%) + 9,115,739 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.91%) + 13,796,381 stalled-cycles-backend:u # 0.40% backend cycles idle (74.91%) + 9,579,016,894 instructions:u # 2.78 insn per cycle + # 0.00 stalled cycles per insn (74.86%) + 1.245115418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 342) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.470593e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877997e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877997e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.826368 sec -INFO: No Floating Point Exceptions have been reported - 2,333,894,912 cycles # 2.807 GHz - 5,878,357,831 instructions # 2.52 insn per cycle - 0.832251124 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.983369e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.535409e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.535409e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.740665 sec +INFO: No Floating Point Exceptions have been reported + 2,003,614,020 cycles:u # 2.649 GHz (74.73%) + 8,694,148 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.64%) + 10,670,384 stalled-cycles-backend:u # 0.53% backend cycles idle (74.62%) + 5,823,125,655 instructions:u # 2.91 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 0.760836257 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1295) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.242144e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.308218e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.308218e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.580800 sec -INFO: No Floating Point Exceptions have been reported - 1,655,777,920 cycles # 2.827 GHz - 3,287,720,584 instructions # 1.99 insn per cycle - 0.586391271 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.015881e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.401925e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.401925e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.562949 sec +INFO: No Floating Point Exceptions have been reported + 1,411,540,720 cycles:u # 2.439 GHz (74.64%) + 8,118,887 stalled-cycles-frontend:u # 0.58% frontend cycles idle (75.12%) + 13,907,036 stalled-cycles-backend:u # 0.99% backend cycles idle (75.13%) + 3,244,629,671 instructions:u # 2.30 insn per cycle + # 0.00 stalled cycles per insn (75.13%) + 0.583279083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1418) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.289151e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.391409e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.391409e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.570490 sec -INFO: No Floating Point Exceptions have been reported - 1,622,799,576 cycles # 2.819 GHz - 3,260,934,090 instructions # 2.01 insn per cycle - 0.576408659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.147175e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.094895e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.094895e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.601115 sec -INFO: No Floating Point Exceptions have been reported - 1,376,859,663 cycles # 2.272 GHz - 2,409,979,343 instructions # 1.75 insn per cycle - 0.607114374 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index d290e84a6a..f6758d0f37 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:27:51 -DATE: 2024-09-18_13:33:34 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.032821e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.078089e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.480611e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.485441 sec -INFO: No Floating Point Exceptions have been reported - 2,051,454,700 cycles # 2.873 GHz - 2,936,249,934 instructions # 1.43 insn per cycle - 0.771058253 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.769393e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.430907e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.483413e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 +TOTAL : 0.524863 sec +INFO: No Floating Point Exceptions have been reported + 1,237,747,185 cycles:u # 2.253 GHz (75.00%) + 3,461,109 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.64%) + 17,464,996 stalled-cycles-backend:u # 1.41% backend cycles idle (75.10%) + 1,400,351,263 instructions:u # 1.13 insn per cycle + # 0.01 stalled cycles per insn (74.71%) + 0.590966926 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956735057756 -Relative difference = 4.559355911674916e-07 +Avg ME (F77/GPU) = 0.14771957969060168 +Relative difference = 5.394724574150425e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.100530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045913e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045913e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.244009 sec -INFO: No Floating Point Exceptions have been reported - 3,662,603,595 cycles # 2.932 GHz - 9,601,734,780 instructions # 2.62 insn per cycle - 1.249887433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.214791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.389631e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.389631e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 +TOTAL : 1.030266 sec +INFO: No Floating Point Exceptions have been reported + 2,979,702,508 cycles:u # 2.865 GHz (74.72%) + 6,889,472 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.69%) + 5,380,573 stalled-cycles-backend:u # 0.18% backend cycles idle (74.65%) + 9,455,387,314 instructions:u # 3.17 insn per cycle + # 0.00 stalled cycles per insn (75.02%) + 1.044658171 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 432) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.260293e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.450195e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.450195e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.554122 sec -INFO: No Floating Point Exceptions have been reported - 1,637,956,120 cycles # 2.928 GHz - 3,967,181,530 instructions # 2.42 insn per cycle - 0.560033790 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.120124e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665555e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665555e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 +TOTAL : 0.506645 sec +INFO: No Floating Point Exceptions have been reported + 1,377,549,413 cycles:u # 2.666 GHz (74.94%) + 6,765,579 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.25%) + 18,960,448 stalled-cycles-backend:u # 1.38% backend cycles idle (75.25%) + 3,820,223,249 instructions:u # 2.77 insn per cycle + # 0.00 stalled cycles per insn (75.24%) + 0.521032641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1513) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 +Avg ME (F77/C++) = 0.14771955448668450 +Relative difference = 3.081061382869002e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.018941e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.312758e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.312758e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.436114 sec -INFO: No Floating Point Exceptions have been reported - 1,253,193,980 cycles # 2.841 GHz - 2,497,513,333 instructions # 1.99 insn per cycle - 0.441707702 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.107571e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.074889e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.074889e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 +TOTAL : 0.425809 sec +INFO: No Floating Point Exceptions have been reported + 1,107,075,808 cycles:u # 2.540 GHz (74.47%) + 5,577,417 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.49%) + 9,015,200 stalled-cycles-backend:u # 0.81% backend cycles idle (74.47%) + 2,411,169,385 instructions:u # 2.18 insn per cycle + # 0.00 stalled cycles per insn (75.37%) + 0.440226538 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 +Avg ME (F77/C++) = 0.14771955128526315 +Relative difference = 3.2977842382139064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.105058e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.563425e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.563425e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.426869 sec -INFO: No Floating Point Exceptions have been reported - 1,223,516,570 cycles # 2.834 GHz - 2,473,072,662 instructions # 2.02 insn per cycle - 0.432489185 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1870) (512y: 1) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.875374e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829234e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829234e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.455920 sec -INFO: No Floating Point Exceptions have been reported - 1,079,442,551 cycles # 2.341 GHz - 2,072,975,829 instructions # 1.92 insn per cycle - 0.461745309 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 12dbe0a7bb..62b65a6b6c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:27:58 -DATE: 2024-09-18_13:33:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.057555e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.155700e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.563343e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.485884 sec -INFO: No Floating Point Exceptions have been reported - 2,043,628,192 cycles # 2.869 GHz - 2,916,801,925 instructions # 1.43 insn per cycle - 0.771023658 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.521701e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.724716e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.778140e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 +TOTAL : 0.358513 sec +INFO: No Floating Point Exceptions have been reported + 813,170,961 cycles:u # 2.136 GHz (72.50%) + 2,461,471 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.24%) + 5,856,414 stalled-cycles-backend:u # 0.72% backend cycles idle (74.75%) + 1,420,371,307 instructions:u # 1.75 insn per cycle + # 0.00 stalled cycles per insn (75.73%) + 0.417668225 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956525510177 -Relative difference = 4.4175008557828484e-07 +Avg ME (F77/GPU) = 0.14771957969060168 +Relative difference = 5.394724574150425e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.191416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.056446e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.056446e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.229162 sec -INFO: No Floating Point Exceptions have been reported - 3,623,698,938 cycles # 2.936 GHz - 9,471,242,034 instructions # 2.61 insn per cycle - 1.234707648 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.219672e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.397516e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.397516e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 +TOTAL : 1.029273 sec +INFO: No Floating Point Exceptions have been reported + 2,974,795,216 cycles:u # 2.862 GHz (74.68%) + 6,707,863 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.67%) + 9,343,346 stalled-cycles-backend:u # 0.31% backend cycles idle (74.78%) + 9,323,790,456 instructions:u # 3.13 insn per cycle + # 0.00 stalled cycles per insn (75.16%) + 1.043606611 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 337) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.264406e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.455240e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455240e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.551418 sec -INFO: No Floating Point Exceptions have been reported - 1,633,608,321 cycles # 2.938 GHz - 3,933,410,721 instructions # 2.41 insn per cycle - 0.556738925 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.126940e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665561e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665561e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 +TOTAL : 0.503144 sec +INFO: No Floating Point Exceptions have been reported + 1,374,855,013 cycles:u # 2.680 GHz (74.86%) + 7,264,560 stalled-cycles-frontend:u # 0.53% frontend cycles idle (75.06%) + 8,850,995 stalled-cycles-backend:u # 0.64% backend cycles idle (75.06%) + 3,796,152,939 instructions:u # 2.76 insn per cycle + # 0.00 stalled cycles per insn (75.06%) + 0.517448809 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1479) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 +Avg ME (F77/C++) = 0.14771955448668450 +Relative difference = 3.081061382869002e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.014495e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.293948e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.293948e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.435710 sec -INFO: No Floating Point Exceptions have been reported - 1,251,845,572 cycles # 2.841 GHz - 2,481,653,408 instructions # 1.98 insn per cycle - 0.441241697 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1817) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.106438e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.074111e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.074111e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 +TOTAL : 0.425530 sec +INFO: No Floating Point Exceptions have been reported + 1,103,180,205 cycles:u # 2.533 GHz (74.31%) + 6,001,306 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.45%) + 37,751,830 stalled-cycles-backend:u # 3.42% backend cycles idle (74.46%) + 2,427,372,520 instructions:u # 2.20 insn per cycle + # 0.02 stalled cycles per insn (74.66%) + 0.439801504 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1802) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 +Avg ME (F77/C++) = 0.14771955128526315 +Relative difference = 3.2977842382139064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.125464e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.603160e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.603160e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.424252 sec -INFO: No Floating Point Exceptions have been reported - 1,222,912,229 cycles # 2.849 GHz - 2,456,305,937 instructions # 2.01 insn per cycle - 0.429917564 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1773) (512y: 1) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.934438e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.010088e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.010088e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.446349 sec -INFO: No Floating Point Exceptions have been reported - 1,072,792,214 cycles # 2.378 GHz - 2,057,138,403 instructions # 1.92 insn per cycle - 0.451920157 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 8f7e2917bf..12c3ce295c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:28:04 -DATE: 2024-09-18_13:33:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.090014e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.319571e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.751667e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.522308 sec -INFO: No Floating Point Exceptions have been reported - 2,179,170,623 cycles # 2.882 GHz - 3,109,984,327 instructions # 1.43 insn per cycle - 0.814646692 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.307714e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169984e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.218366e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.385223 sec +INFO: No Floating Point Exceptions have been reported + 806,181,073 cycles:u # 1.952 GHz (74.18%) + 2,359,397 stalled-cycles-frontend:u # 0.29% frontend cycles idle (72.98%) + 9,657,871 stalled-cycles-backend:u # 1.20% backend cycles idle (75.31%) + 1,435,597,807 instructions:u # 1.78 insn per cycle + # 0.01 stalled cycles per insn (77.22%) + 0.450161349 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.952529e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.022459e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.022459e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.284601 sec -INFO: No Floating Point Exceptions have been reported - 3,782,838,045 cycles # 2.933 GHz - 9,753,328,321 instructions # 2.58 insn per cycle - 1.290389924 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.032746e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158591e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.158591e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.232523 sec +INFO: No Floating Point Exceptions have been reported + 3,470,920,803 cycles:u # 2.779 GHz (74.96%) + 8,564,896 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.02%) + 5,964,520 stalled-cycles-backend:u # 0.17% backend cycles idle (75.02%) + 9,634,206,038 instructions:u # 2.78 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 1.253147920 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.563360e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027715e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027715e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.783750 sec -INFO: No Floating Point Exceptions have been reported - 2,313,452,686 cycles # 2.933 GHz - 5,920,736,181 instructions # 2.56 insn per cycle - 0.789531453 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.997756e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.578768e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.578768e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.743253 sec +INFO: No Floating Point Exceptions have been reported + 1,959,017,066 cycles:u # 2.579 GHz (74.82%) + 7,999,445 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.82%) + 12,794,708 stalled-cycles-backend:u # 0.65% backend cycles idle (74.73%) + 5,899,724,502 instructions:u # 3.01 insn per cycle + # 0.00 stalled cycles per insn (74.73%) + 0.763954796 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1383) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.274544e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.372577e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.372577e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.574756 sec -INFO: No Floating Point Exceptions have been reported - 1,639,105,587 cycles # 2.827 GHz - 3,253,580,218 instructions # 1.98 insn per cycle - 0.580508158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.086028e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.556292e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.556292e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.556429 sec +INFO: No Floating Point Exceptions have been reported + 1,370,947,632 cycles:u # 2.393 GHz (75.07%) + 7,972,174 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.87%) + 14,848,905 stalled-cycles-backend:u # 1.08% backend cycles idle (74.99%) + 3,233,666,236 instructions:u # 2.36 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 0.577262695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 +Avg ME (F77/C++) = 0.14771956675526976 +Relative difference = 2.2505293980258705e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.338032e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.481810e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481810e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.561846 sec -INFO: No Floating Point Exceptions have been reported - 1,602,124,528 cycles # 2.826 GHz - 3,209,983,521 instructions # 2.00 insn per cycle - 0.567621873 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.198566e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.176156e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.176156e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.590094 sec -INFO: No Floating Point Exceptions have been reported - 1,347,708,343 cycles # 2.265 GHz - 2,376,834,038 instructions # 1.76 insn per cycle - 0.595752442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 856901d743..c45e743959 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-09-18_19:28:10 -DATE: 2024-09-18_13:34:09 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.212511e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510212e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.023434e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.523683 sec -INFO: No Floating Point Exceptions have been reported - 2,161,000,888 cycles # 2.849 GHz - 3,093,780,518 instructions # 1.43 insn per cycle - 0.816657446 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.804264e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.167273e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.215068e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.412741 sec +INFO: No Floating Point Exceptions have been reported + 824,415,251 cycles:u # 1.980 GHz (75.53%) + 2,468,925 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.32%) + 8,503,324 stalled-cycles-backend:u # 1.03% backend cycles idle (76.14%) + 1,419,988,872 instructions:u # 1.72 insn per cycle + # 0.01 stalled cycles per insn (76.40%) + 0.481430048 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.006386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.027076e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027076e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.275858 sec -INFO: No Floating Point Exceptions have been reported - 3,759,691,883 cycles # 2.936 GHz - 9,643,680,583 instructions # 2.57 insn per cycle - 1.281474685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.028761e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.154688e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154688e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.237143 sec +INFO: No Floating Point Exceptions have been reported + 3,501,274,647 cycles:u # 2.792 GHz (74.66%) + 8,331,258 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.93%) + 10,458,091 stalled-cycles-backend:u # 0.30% backend cycles idle (75.13%) + 9,540,303,436 instructions:u # 2.72 insn per cycle + # 0.00 stalled cycles per insn (75.13%) + 1.258419856 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 343) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.517196e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.947819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.947819e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.804148 sec -INFO: No Floating Point Exceptions have been reported - 2,322,905,849 cycles # 2.871 GHz - 5,850,527,655 instructions # 2.52 insn per cycle - 0.809789330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.005056e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.585052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585052e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.740844 sec +INFO: No Floating Point Exceptions have been reported + 1,964,814,058 cycles:u # 2.594 GHz (74.69%) + 8,089,559 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.69%) + 13,788,218 stalled-cycles-backend:u # 0.70% backend cycles idle (74.72%) + 5,865,914,908 instructions:u # 2.99 insn per cycle + # 0.00 stalled cycles per insn (74.82%) + 0.761884120 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.254780e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.333242e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.333242e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.577840 sec -INFO: No Floating Point Exceptions have been reported - 1,650,198,876 cycles # 2.831 GHz - 3,216,570,367 instructions # 1.95 insn per cycle - 0.583563842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.078459e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.540950e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.540950e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.558827 sec +INFO: No Floating Point Exceptions have been reported + 1,380,738,015 cycles:u # 2.399 GHz (75.02%) + 8,097,320 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.99%) + 23,334,783 stalled-cycles-backend:u # 1.69% backend cycles idle (74.99%) + 3,206,215,655 instructions:u # 2.32 insn per cycle + # 0.01 stalled cycles per insn (75.11%) + 0.580226895 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1487) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 +Avg ME (F77/C++) = 0.14771956675526976 +Relative difference = 2.2505293980258705e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.314025e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.454653e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.454653e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.565918 sec -INFO: No Floating Point Exceptions have been reported - 1,600,538,363 cycles # 2.803 GHz - 3,181,550,003 instructions # 1.99 insn per cycle - 0.571587963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.185908e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.142994e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.142994e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.592175 sec -INFO: No Floating Point Exceptions have been reported - 1,356,716,498 cycles # 2.272 GHz - 2,361,264,569 instructions # 1.74 insn per cycle - 0.597815792 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1056) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 99516e3f65..03f48ac4eb 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:26:18 -DATE: 2024-09-18_13:30:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.206537e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.286021e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.966943e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.539704 sec -INFO: No Floating Point Exceptions have been reported - 2,208,534,555 cycles # 2.845 GHz - 3,150,536,398 instructions # 1.43 insn per cycle - 0.835623159 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.972005e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492614e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513911e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.456275 sec +INFO: No Floating Point Exceptions have been reported + 948,437,393 cycles:u # 2.054 GHz (73.91%) + 2,505,859 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.62%) + 7,088,010 stalled-cycles-backend:u # 0.75% backend cycles idle (76.35%) + 1,517,377,285 instructions:u # 1.60 insn per cycle + # 0.00 stalled cycles per insn (76.19%) + 0.650445358 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 +Avg ME (F77/GPU) = 2.0158358666195553 +Relative difference = 6.616631755314852e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.822612e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.869779e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.869779e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.861835 sec -INFO: No Floating Point Exceptions have been reported - 17,248,615,219 cycles # 2.940 GHz - 45,920,744,006 instructions # 2.66 insn per cycle - 5.867505238 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.218247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.268669e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268669e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.937872 sec +INFO: No Floating Point Exceptions have been reported + 14,977,288,401 cycles:u # 3.026 GHz (74.97%) + 10,353,628 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) + 3,049,842,670 stalled-cycles-backend:u # 20.36% backend cycles idle (74.96%) + 45,597,437,018 instructions:u # 3.04 insn per cycle + # 0.07 stalled cycles per insn (74.95%) + 5.024039428 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194407 -Relative difference = 6.616637439061751e-08 +Avg ME (F77/C++) = 2.0158358666194411 +Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.157644e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.314617e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.314617e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.430104 sec -INFO: No Floating Point Exceptions have been reported - 10,035,725,674 cycles # 2.922 GHz - 27,802,903,324 instructions # 2.77 insn per cycle - 3.435933108 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.845460e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.016940e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.016940e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.935896 sec +INFO: No Floating Point Exceptions have been reported + 8,794,033,219 cycles:u # 2.985 GHz (75.05%) + 8,968,587 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) + 2,713,571,872 stalled-cycles-backend:u # 30.86% backend cycles idle (75.03%) + 27,707,029,536 instructions:u # 3.15 insn per cycle + # 0.10 stalled cycles per insn (75.04%) + 3.044904707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.941289e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.318652e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.318652e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.231098 sec -INFO: No Floating Point Exceptions have been reported - 6,101,804,369 cycles # 2.729 GHz - 12,586,990,350 instructions # 2.06 insn per cycle - 2.237005738 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.459220e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.926351e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.926351e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.835055 sec +INFO: No Floating Point Exceptions have been reported + 5,333,169,296 cycles:u # 2.893 GHz (74.99%) + 8,715,004 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.89%) + 560,049,655 stalled-cycles-backend:u # 10.50% backend cycles idle (74.90%) + 12,436,304,398 instructions:u # 2.33 insn per cycle + # 0.05 stalled cycles per insn (74.84%) + 1.955858148 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2492) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.519324e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.987161e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.987161e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.007245 sec -INFO: No Floating Point Exceptions have been reported - 5,563,695,868 cycles # 2.765 GHz - 12,000,166,171 instructions # 2.16 insn per cycle - 2.013040788 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2365) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.502694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.684435e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.684435e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.101349 sec -INFO: No Floating Point Exceptions have been reported - 5,749,698,258 cycles # 1.851 GHz - 8,343,640,860 instructions # 1.45 insn per cycle - 3.107135736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 1f4bfaf624..5c1904fff2 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:26:32 -DATE: 2024-09-18_13:31:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.340722e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.356922e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.992900e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.532286 sec -INFO: No Floating Point Exceptions have been reported - 2,205,060,845 cycles # 2.868 GHz - 3,167,717,935 instructions # 1.44 insn per cycle - 0.825884785 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.962969e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.474341e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.495413e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.463107 sec +INFO: No Floating Point Exceptions have been reported + 940,768,689 cycles:u # 2.050 GHz (76.06%) + 2,496,325 stalled-cycles-frontend:u # 0.27% frontend cycles idle (76.11%) + 5,742,181 stalled-cycles-backend:u # 0.61% backend cycles idle (74.90%) + 1,618,691,327 instructions:u # 1.72 insn per cycle + # 0.00 stalled cycles per insn (73.75%) + 0.637262581 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 +Avg ME (F77/GPU) = 2.0158358666195553 +Relative difference = 6.616631755314852e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.873402e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.922894e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.922894e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.705020 sec -INFO: No Floating Point Exceptions have been reported - 16,751,892,515 cycles # 2.934 GHz - 44,906,929,991 instructions # 2.68 insn per cycle - 5.710885629 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.367031e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.426766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.426766e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.635931 sec +INFO: No Floating Point Exceptions have been reported + 14,052,570,920 cycles:u # 3.021 GHz (75.02%) + 8,949,168 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 2,267,268,134 stalled-cycles-backend:u # 16.13% backend cycles idle (74.90%) + 44,497,614,973 instructions:u # 3.17 insn per cycle + # 0.05 stalled cycles per insn (74.95%) + 4.735362364 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.361567e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.536177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.536177e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.227440 sec -INFO: No Floating Point Exceptions have been reported - 9,512,762,540 cycles # 2.943 GHz - 26,678,539,109 instructions # 2.80 insn per cycle - 3.233163450 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.086943e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.280498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.280498e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.789988 sec +INFO: No Floating Point Exceptions have been reported + 8,296,693,720 cycles:u # 2.976 GHz (75.04%) + 9,485,272 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.04%) + 1,459,624,763 stalled-cycles-backend:u # 17.59% backend cycles idle (75.05%) + 26,682,455,448 instructions:u # 3.22 insn per cycle + # 0.05 stalled cycles per insn (75.07%) + 2.873886609 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2278) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.604596e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.927835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.927835e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.385363 sec -INFO: No Floating Point Exceptions have been reported - 6,599,025,301 cycles # 2.760 GHz - 14,108,971,201 instructions # 2.14 insn per cycle - 2.391489598 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2705) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.808830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.184155e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.184155e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.011572 sec +INFO: No Floating Point Exceptions have been reported + 5,961,619,484 cycles:u # 2.942 GHz (74.77%) + 9,990,396 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) + 1,230,778,826 stalled-cycles-backend:u # 20.65% backend cycles idle (75.14%) + 14,121,190,623 instructions:u # 2.37 insn per cycle + # 0.09 stalled cycles per insn (75.14%) + 2.114162326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2700) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.791684e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.138771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.138771e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.294784 sec -INFO: No Floating Point Exceptions have been reported - 6,350,789,081 cycles # 2.762 GHz - 13,712,967,214 instructions # 2.16 insn per cycle - 2.300513281 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 298) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.371675e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.540530e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.540530e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.217988 sec -INFO: No Floating Point Exceptions have been reported - 5,939,821,646 cycles # 1.843 GHz - 10,101,817,070 instructions # 1.70 insn per cycle - 3.223668588 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 1a672b74ce..f7b829d7ca 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:26:47 -DATE: 2024-09-18_13:31:37 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.264093e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.766977e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.882650e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.489615 sec -INFO: No Floating Point Exceptions have been reported - 2,060,695,462 cycles # 2.874 GHz - 2,961,708,283 instructions # 1.44 insn per cycle - 0.774445109 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.980538e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.098565e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120913e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 +TOTAL : 0.379471 sec +INFO: No Floating Point Exceptions have been reported + 829,738,360 cycles:u # 2.119 GHz (74.46%) + 2,361,328 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.50%) + 12,584,293 stalled-cycles-backend:u # 1.52% backend cycles idle (75.50%) + 1,463,399,939 instructions:u # 1.76 insn per cycle + # 0.01 stalled cycles per insn (76.48%) + 0.572907708 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.015844e+00 +Avg ME (F77/GPU) = 2.0158466693246737 +Relative difference = 1.3241722443517625e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.937524e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.992418e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.992418e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.498876 sec -INFO: No Floating Point Exceptions have been reported - 16,211,815,789 cycles # 2.946 GHz - 45,319,917,505 instructions # 2.80 insn per cycle - 5.504546294 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.463914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.528816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528816e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 4.436058 sec +INFO: No Floating Point Exceptions have been reported + 13,514,597,589 cycles:u # 3.049 GHz (74.84%) + 8,749,737 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.84%) + 2,777,186,527 stalled-cycles-backend:u # 20.55% backend cycles idle (75.00%) + 45,506,266,055 instructions:u # 3.37 insn per cycle + # 0.06 stalled cycles per insn (75.09%) + 4.529851721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 667) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 +Avg ME (F77/C++) = 2.0158491450129077 +Relative difference = 7.193639399772436e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.533229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.869354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.869354e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.401545 sec -INFO: No Floating Point Exceptions have been reported - 7,056,760,375 cycles # 2.932 GHz - 17,791,878,594 instructions # 2.52 insn per cycle - 2.407391534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.910480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.910480e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 2.067754 sec +INFO: No Floating Point Exceptions have been reported + 6,153,571,441 cycles:u # 2.994 GHz (74.92%) + 6,628,536 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.80%) + 2,561,584,960 stalled-cycles-backend:u # 41.63% backend cycles idle (74.91%) + 17,076,572,379 instructions:u # 2.78 insn per cycle + # 0.15 stalled cycles per insn (75.10%) + 2.144863506 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2902) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 +Avg ME (F77/C++) = 2.0158492142800242 +Relative difference = 1.0629765641719438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.087610e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.152694e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.152694e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.383748 sec -INFO: No Floating Point Exceptions have been reported - 3,839,977,803 cycles # 2.765 GHz - 8,262,037,377 instructions # 2.15 insn per cycle - 1.389311013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3371) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.054455e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180793e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180793e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 +TOTAL : 1.177516 sec +INFO: No Floating Point Exceptions have been reported + 3,381,876,587 cycles:u # 2.889 GHz (74.79%) + 7,165,420 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.76%) + 828,613,511 stalled-cycles-backend:u # 24.50% backend cycles idle (75.05%) + 8,048,609,709 instructions:u # 2.38 insn per cycle + # 0.10 stalled cycles per insn (75.39%) + 1.295862118 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3258) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158479403471574 +Relative difference = 2.9591934841076347e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.847495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.011837e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.011837e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.271772 sec -INFO: No Floating Point Exceptions have been reported - 3,548,498,858 cycles # 2.779 GHz - 7,914,474,526 instructions # 2.23 insn per cycle - 1.277559305 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3214) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.536546e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.195032e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.195032e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.690671 sec -INFO: No Floating Point Exceptions have been reported - 3,256,995,213 cycles # 1.921 GHz - 6,100,882,884 instructions # 1.87 insn per cycle - 1.696260075 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2258) (512y: 22) (512z: 2156) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index d3b2f0408f..87536c8e20 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:26:59 -DATE: 2024-09-18_13:31:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.208288e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.783701e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.898530e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.487345 sec -INFO: No Floating Point Exceptions have been reported - 2,044,938,895 cycles # 2.858 GHz - 2,894,501,323 instructions # 1.42 insn per cycle - 0.773252899 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.115283e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.129315e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151880e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 +TOTAL : 0.386185 sec +INFO: No Floating Point Exceptions have been reported + 803,193,561 cycles:u # 2.058 GHz (75.44%) + 2,329,368 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.20%) + 12,768,156 stalled-cycles-backend:u # 1.59% backend cycles idle (75.32%) + 1,467,124,445 instructions:u # 1.83 insn per cycle + # 0.01 stalled cycles per insn (75.46%) + 0.605071110 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.015844e+00 +Avg ME (F77/GPU) = 2.0158466693246737 +Relative difference = 1.3241722443517625e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.963294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.019360e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.019360e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.426510 sec -INFO: No Floating Point Exceptions have been reported - 15,955,926,327 cycles # 2.938 GHz - 44,427,771,107 instructions # 2.78 insn per cycle - 5.431874949 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.684271e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.761449e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.761449e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 4.072075 sec +INFO: No Floating Point Exceptions have been reported + 12,446,264,441 cycles:u # 3.050 GHz (74.92%) + 7,172,343 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.93%) + 1,595,583,580 stalled-cycles-backend:u # 12.82% backend cycles idle (74.93%) + 44,294,289,533 instructions:u # 3.56 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 4.105681463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 +Avg ME (F77/C++) = 2.0158491450129077 +Relative difference = 7.193639399772436e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.335493e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.807156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.807156e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.051564 sec -INFO: No Floating Point Exceptions have been reported - 6,058,187,563 cycles # 2.946 GHz - 17,074,725,200 instructions # 2.82 insn per cycle - 2.057140058 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.085417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.509200e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.509200e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 1.889702 sec +INFO: No Floating Point Exceptions have been reported + 5,642,254,450 cycles:u # 2.971 GHz (74.99%) + 6,365,297 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.15%) + 1,763,423,250 stalled-cycles-backend:u # 31.25% backend cycles idle (75.15%) + 16,907,851,890 instructions:u # 3.00 insn per cycle + # 0.10 stalled cycles per insn (75.15%) + 1.964939519 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2752) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 +Avg ME (F77/C++) = 2.0158492142800242 +Relative difference = 1.0629765641719438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.066914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.644109e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.644109e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.813330 sec -INFO: No Floating Point Exceptions have been reported - 5,026,891,048 cycles # 2.765 GHz - 10,223,175,449 instructions # 2.03 insn per cycle - 1.818918027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.869022e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.552438e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.552438e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 +TOTAL : 1.498616 sec +INFO: No Floating Point Exceptions have been reported + 4,448,501,705 cycles:u # 2.949 GHz (74.88%) + 7,315,338 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.84%) + 1,680,475,755 stalled-cycles-backend:u # 37.78% backend cycles idle (74.98%) + 10,195,091,217 instructions:u # 2.29 insn per cycle + # 0.16 stalled cycles per insn (75.08%) + 1.541118768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158479403471574 +Relative difference = 2.9591934841076347e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.155601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.742490e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.742490e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.789620 sec -INFO: No Floating Point Exceptions have been reported - 4,970,225,584 cycles # 2.770 GHz - 9,994,978,881 instructions # 2.01 insn per cycle - 1.795236203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.666448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.992729e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.992729e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 2.333237 sec -INFO: No Floating Point Exceptions have been reported - 4,367,486,322 cycles # 1.868 GHz - 8,444,271,998 instructions # 1.93 insn per cycle - 2.338821094 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index c1f4bb8132..39a1b0d89f 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:27:11 -DATE: 2024-09-18_13:32:20 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.373966e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.408476e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005223e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.533114 sec -INFO: No Floating Point Exceptions have been reported - 2,212,396,057 cycles # 2.876 GHz - 3,189,695,931 instructions # 1.44 insn per cycle - 0.826417249 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.934496e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433672e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.453957e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.423359 sec +INFO: No Floating Point Exceptions have been reported + 984,376,123 cycles:u # 2.183 GHz (74.98%) + 2,542,046 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.21%) + 5,049,320 stalled-cycles-backend:u # 0.51% backend cycles idle (75.63%) + 1,505,653,920 instructions:u # 1.53 insn per cycle + # 0.00 stalled cycles per insn (75.61%) + 0.485761756 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.812942e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.859362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.859362e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.891473 sec -INFO: No Floating Point Exceptions have been reported - 17,373,992,128 cycles # 2.947 GHz - 46,072,043,013 instructions # 2.65 insn per cycle - 5.897196721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.275227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.330911e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.330911e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.810598 sec +INFO: No Floating Point Exceptions have been reported + 14,609,690,701 cycles:u # 3.028 GHz (74.90%) + 8,816,517 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) + 2,840,211,842 stalled-cycles-backend:u # 19.44% backend cycles idle (74.97%) + 45,713,684,667 instructions:u # 3.13 insn per cycle + # 0.06 stalled cycles per insn (75.03%) + 4.829850527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.226094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.386425e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386425e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.358561 sec -INFO: No Floating Point Exceptions have been reported - 9,911,091,884 cycles # 2.947 GHz - 27,587,758,232 instructions # 2.78 insn per cycle - 3.364358964 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.828976e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.997993e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.997993e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.944861 sec +INFO: No Floating Point Exceptions have been reported + 8,818,778,594 cycles:u # 2.980 GHz (74.92%) + 9,374,426 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.86%) + 2,768,129,590 stalled-cycles-backend:u # 31.39% backend cycles idle (74.86%) + 27,575,612,310 instructions:u # 3.13 insn per cycle + # 0.10 stalled cycles per insn (74.98%) + 2.963974282 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.044961e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.439076e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.439076e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.186755 sec -INFO: No Floating Point Exceptions have been reported - 6,022,763,481 cycles # 2.748 GHz - 12,488,130,017 instructions # 2.07 insn per cycle - 2.192467039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2776) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.059239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.581260e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.581260e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.947799 sec +INFO: No Floating Point Exceptions have been reported + 5,716,686,193 cycles:u # 2.913 GHz (74.74%) + 8,513,700 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.90%) + 1,359,585,869 stalled-cycles-backend:u # 23.78% backend cycles idle (75.10%) + 12,236,791,945 instructions:u # 2.14 insn per cycle + # 0.11 stalled cycles per insn (75.14%) + 1.966967622 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2671) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 +Avg ME (F77/C++) = 2.0158359151896224 +Relative difference = 4.20720623263505e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.596506e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.079685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.079685e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 1.980331 sec -INFO: No Floating Point Exceptions have been reported - 5,504,974,873 cycles # 2.773 GHz - 11,923,154,801 instructions # 2.17 insn per cycle - 1.986372291 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.610025e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.802161e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.802161e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.013333 sec -INFO: No Floating Point Exceptions have been reported - 5,617,715,088 cycles # 1.861 GHz - 8,110,898,143 instructions # 1.44 insn per cycle - 3.019371634 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1865) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 744bfec9d4..ede8c1d2c7 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-09-18_19:27:25 -DATE: 2024-09-18_13:32:45 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.356227e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.388949e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002637e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.530171 sec -INFO: No Floating Point Exceptions have been reported - 2,205,062,942 cycles # 2.875 GHz - 3,154,626,469 instructions # 1.43 insn per cycle - 0.823696592 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.000593e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.522978e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.544792e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.427450 sec +INFO: No Floating Point Exceptions have been reported + 965,106,560 cycles:u # 2.118 GHz (75.70%) + 2,488,206 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.42%) + 10,996,365 stalled-cycles-backend:u # 1.14% backend cycles idle (75.48%) + 1,577,569,884 instructions:u # 1.63 insn per cycle + # 0.01 stalled cycles per insn (74.55%) + 0.489792643 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.861428e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.909561e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.909561e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.739728 sec -INFO: No Floating Point Exceptions have been reported - 16,938,834,117 cycles # 2.949 GHz - 45,091,140,717 instructions # 2.66 insn per cycle - 5.745446347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.338951e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.397942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.397942e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.686230 sec +INFO: No Floating Point Exceptions have been reported + 14,307,115,528 cycles:u # 3.044 GHz (74.99%) + 8,712,906 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) + 780,057,400 stalled-cycles-backend:u # 5.45% backend cycles idle (75.00%) + 44,583,603,957 instructions:u # 3.12 insn per cycle + # 0.02 stalled cycles per insn (74.99%) + 4.705408961 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.325491e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.496074e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.496074e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.260792 sec -INFO: No Floating Point Exceptions have been reported - 9,505,160,256 cycles # 2.910 GHz - 26,249,919,899 instructions # 2.76 insn per cycle - 3.266614954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.124854e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.322609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.322609e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.748390 sec +INFO: No Floating Point Exceptions have been reported + 8,264,613,718 cycles:u # 2.991 GHz (74.84%) + 9,568,431 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.83%) + 1,106,363,678 stalled-cycles-backend:u # 13.39% backend cycles idle (74.98%) + 26,285,216,045 instructions:u # 3.18 insn per cycle + # 0.04 stalled cycles per insn (75.10%) + 2.767313701 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2311) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.459875e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.763541e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.763541e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.458814 sec -INFO: No Floating Point Exceptions have been reported - 6,750,977,111 cycles # 2.740 GHz - 14,029,286,718 instructions # 2.08 insn per cycle - 2.464538527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.733806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.099178e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.099178e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.034839 sec +INFO: No Floating Point Exceptions have been reported + 5,999,141,474 cycles:u # 2.927 GHz (75.02%) + 8,863,145 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) + 1,779,427,123 stalled-cycles-backend:u # 29.66% backend cycles idle (75.02%) + 13,978,554,557 instructions:u # 2.33 insn per cycle + # 0.13 stalled cycles per insn (75.06%) + 2.053844830 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2870) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 +Avg ME (F77/C++) = 2.0158359151896224 +Relative difference = 4.20720623263505e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.781257e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.129375e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.129375e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.300139 sec -INFO: No Floating Point Exceptions have been reported - 6,382,631,497 cycles # 2.769 GHz - 13,515,067,929 instructions # 2.12 insn per cycle - 2.305941749 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.602901e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.797238e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.797238e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.017121 sec -INFO: No Floating Point Exceptions have been reported - 5,589,518,345 cycles # 1.850 GHz - 9,206,594,679 instructions # 1.65 insn per cycle - 3.022936699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED From a45dcb552eb7c3817332acdce65f7311d4fa4a25 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 20:32:06 +0300 Subject: [PATCH 60/76] [amd] in gq_ttq.mad and CODEGEN cudacpp.mk add optional debug flags for rocgdb on HIP (to debug the memory fault #806) --- .../madgraph/iolibs/template_files/gpu/cudacpp.mk | 7 +++++-- epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index a610dc3ea8..4f3ea91be1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) From 4416181baf472109df228dfa7b17c65a4eae296a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Wed, 18 Sep 2024 20:00:36 +0200 Subject: [PATCH 61/76] [amd] regenerate all processes (just with some comments in cudacpp.mk) --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 12 +++++----- .../ee_mumu.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_ee_mumu_log.txt | 10 ++++----- .../ee_mumu.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 +++++----- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++++----- .../cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 16 +++++++------- .../gg_tt01g.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++++++++--------- .../gg_ttg.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 ++++----- .../cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 16 +++++++------- .../gg_ttgg.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++++++------ .../gg_ttgg.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 18 +++++++-------- .../gg_ttggg.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 +++++++------- .../gg_ttggg.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 18 +++++++-------- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 14 ++++++------ .../cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_heft_gg_bb_log.txt | 10 ++++----- .../heft_gg_bb.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 8 +++---- .../heft_gg_bb.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 22 +++++++++---------- .../nobm_pp_ttW.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_pp_tt012j_log.txt | 20 ++++++++--------- .../pp_tt012j.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 18 +++++++-------- .../smeft_gg_tttt.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 ++++++------ .../smeft_gg_tttt.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 10 ++++----- .../susy_gg_t1t1.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 8 +++---- .../susy_gg_t1t1.sa/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_mad_susy_gg_tt_log.txt | 14 ++++++------ .../susy_gg_tt.mad/SubProcesses/cudacpp.mk | 7 ++++-- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 10 ++++----- .../susy_gg_tt.sa/SubProcesses/cudacpp.mk | 7 ++++-- 45 files changed, 270 insertions(+), 204 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 6010e696c6..e9017c49df 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005605936050415039  +DEBUG: model prefixing takes 0.005692958831787109  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,7 +182,7 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.071 s +Wrote files for 8 helas calls in 0.072 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines @@ -234,10 +234,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.087s -user 0m1.815s -sys 0m0.264s -Code generation completed in 2 seconds +real 0m3.845s +user 0m1.829s +sys 0m0.251s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 7c6ab02bd5..608753e001 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -57,7 +57,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005640983581542969  +DEBUG: model prefixing takes 0.005699634552001953  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -149,7 +149,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.282 s +ALOHA: aloha creates 4 routines in 0.276 s FFV1 FFV1 FFV2 @@ -196,7 +196,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.764s +real 0m0.775s user 0m0.619s -sys 0m0.055s +sys 0m0.043s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 2551473a78..6ff78a3661 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005740642547607422  +DEBUG: model prefixing takes 0.0057220458984375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,12 +183,12 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.073 s +Wrote files for 10 helas calls in 0.074 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.152 s +ALOHA: aloha creates 2 routines in 0.149 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -228,9 +228,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.933s -user 0m1.680s -sys 0m0.251s +real 0m2.049s +user 0m1.643s +sys 0m0.271s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index c29b1e2433..16f7e1adfd 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005649089813232422  +DEBUG: model prefixing takes 0.005597114562988281  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -176,7 +176,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.146 s VVV1 FFV1 FFV1 @@ -191,7 +191,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.547s -user 0m0.489s -sys 0m0.049s +real 0m0.897s +user 0m0.476s +sys 0m0.056s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 7c0904d06f..10026f0131 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0056819915771484375  +DEBUG: model prefixing takes 0.005770444869995117  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -204,7 +204,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.191 s +Wrote files for 46 helas calls in 0.194 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -212,14 +212,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.332 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.318 s +ALOHA: aloha creates 10 routines in 0.319 s VVV1 VVV1 FFV1 @@ -267,10 +267,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.650s -user 0m2.337s -sys 0m0.308s -Code generation completed in 2 seconds +real 0m2.887s +user 0m2.344s +sys 0m0.296s +Code generation completed in 3 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 18ad3844a6..76bd44c193 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005638837814331055  +DEBUG: model prefixing takes 0.005703926086425781  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s -Wrote files for 36 helas calls in 0.126 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.123 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.332 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.321 s +ALOHA: aloha creates 10 routines in 0.319 s VVV1 VVV1 FFV1 @@ -239,10 +239,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.513s -user 0m2.242s -sys 0m0.259s -Code generation completed in 3 seconds +real 0m3.320s +user 0m2.195s +sys 0m0.274s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 1c2396d45a..7fff8e07b7 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005800962448120117  +DEBUG: model prefixing takes 0.005605220794677734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.023 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg Load PLUGIN.CUDACPP_OUTPUT @@ -199,7 +199,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.804s -user 0m0.744s -sys 0m0.051s +real 0m0.887s +user 0m0.734s +sys 0m0.052s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 77dd9d1a0d..b0e5cdabdf 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057544708251953125  +DEBUG: model prefixing takes 0.00579071044921875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.443 s -Wrote files for 222 helas calls in 0.683 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.434 s +Wrote files for 222 helas calls in 0.680 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.345 s +ALOHA: aloha creates 5 routines in 0.339 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.326 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -242,9 +242,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.912s -user 0m3.607s -sys 0m0.294s +real 0m3.940s +user 0m3.563s +sys 0m0.292s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 208ddd63e8..9ec88f76e4 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005644083023071289  +DEBUG: model prefixing takes 0.005756378173828125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.163 s +1 processes with 123 diagrams generated in 0.165 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.440 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.442 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.340 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.489s -user 0m1.419s -sys 0m0.057s +real 0m1.576s +user 0m1.428s +sys 0m0.060s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index a3493e03ce..f3d8babced 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005598545074462891  +DEBUG: model prefixing takes 0.0056743621826171875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.940 s +1 processes with 1240 diagrams generated in 1.949 s Total: 1 processes with 1240 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -184,8 +184,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.735 s -Wrote files for 2281 helas calls in 18.815 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.727 s +Wrote files for 2281 helas calls in 18.905 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -193,14 +193,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.327 s +ALOHA: aloha creates 5 routines in 0.331 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.321 s +ALOHA: aloha creates 10 routines in 0.320 s VVV1 VVV1 FFV1 @@ -244,9 +244,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m33.389s -user 0m32.805s -sys 0m0.479s +real 0m33.441s +user 0m32.899s +sys 0m0.437s Code generation completed in 34 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index 4c13616b50..d61dcd4ef1 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -57,7 +57,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005697011947631836  +DEBUG: model prefixing takes 0.005732297897338867  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.926 s +1 processes with 1240 diagrams generated in 1.936 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -172,14 +172,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.807 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.740 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.362 s +ALOHA: aloha creates 5 routines in 0.358 s VVV1 VVV1 FFV1 @@ -202,7 +202,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.431s -user 0m13.266s -sys 0m0.108s -Code generation completed in 13 seconds +real 0m13.472s +user 0m13.208s +sys 0m0.101s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 573fe7ee40..2a9ce74dda 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005885124206542969  +DEBUG: model prefixing takes 0.005671024322509766  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.079 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -216,17 +216,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1548]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.170 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s +Wrote files for 32 helas calls in 0.166 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.154 s +ALOHA: aloha creates 2 routines in 0.151 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.141 s +ALOHA: aloha creates 4 routines in 0.137 s FFV1 FFV1 FFV1 @@ -272,9 +272,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.271s -user 0m1.947s -sys 0m0.318s +real 0m2.831s +user 0m1.943s +sys 0m0.282s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index ae4eb0c582..527d0c838e 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005564212799072266  +DEBUG: model prefixing takes 0.005598783493041992  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -165,7 +165,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.081 s +8 processes with 40 diagrams generated in 0.080 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -208,7 +208,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.151 s +ALOHA: aloha creates 2 routines in 0.147 s FFV1 FFV1 FFV1 @@ -224,7 +224,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m1.469s -user 0m0.619s -sys 0m0.058s -Code generation completed in 2 seconds +real 0m0.747s +user 0m0.588s +sys 0m0.059s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index e8641c274f..7aaebdcb5e 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -162,13 +162,13 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.275 s +ALOHA: aloha creates 4 routines in 0.271 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.259 s +ALOHA: aloha creates 8 routines in 0.254 s VVS3 VVV1 FFV1 @@ -206,9 +206,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.195s -user 0m1.897s -sys 0m0.292s +real 0m2.179s +user 0m1.907s +sys 0m0.272s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 10bc8ea3f2..eeec277df5 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -150,7 +150,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.270 s +ALOHA: aloha creates 4 routines in 0.269 s VVS3 VVV1 FFV1 @@ -167,7 +167,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.659s -user 0m0.618s -sys 0m0.032s +real 0m0.664s +user 0m0.584s +sys 0m0.059s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/heft_gg_bb.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index 0ea4d26630..64acb57091 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0058252811431884766  +DEBUG: model prefixing takes 0.005632638931274414  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.111 s +4 processes with 8 diagrams generated in 0.110 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -222,7 +222,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.665 s +12 processes with 144 diagrams generated in 0.657 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -353,19 +353,19 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1548]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.207 s -Wrote files for 212 helas calls in 0.855 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.208 s +Wrote files for 212 helas calls in 0.885 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.208 s +ALOHA: aloha creates 3 routines in 0.205 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.207 s +ALOHA: aloha creates 6 routines in 0.203 s FFV1 FFV1 FFV1 @@ -461,10 +461,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.799s -user 0m4.206s -sys 0m0.574s -Code generation completed in 5 seconds +real 0m5.225s +user 0m4.183s +sys 0m0.518s +Code generation completed in 6 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index e090fc68c6..a93cb1b567 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -56,7 +56,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00563502311706543  +DEBUG: model prefixing takes 0.005572795867919922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -207,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.139 s +13 processes with 76 diagrams generated in 0.142 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -373,7 +373,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.865 s +65 processes with 1119 diagrams generated in 1.862 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -691,8 +691,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1548]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.314 s -Wrote files for 810 helas calls in 2.832 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.313 s +Wrote files for 810 helas calls in 2.875 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -700,14 +700,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.347 s +ALOHA: aloha creates 5 routines in 0.343 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.321 s +ALOHA: aloha creates 10 routines in 0.325 s VVV1 VVV1 FFV1 @@ -885,9 +885,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.708s -user 0m9.682s -sys 0m0.994s +real 0m10.733s +user 0m9.674s +sys 0m0.936s Code generation completed in 11 seconds ************************************************************ * * diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 204ade8c71..10b6e52273 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1429440975189209  +DEBUG: model prefixing takes 0.14131855964660645  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.791 s +1 processes with 72 diagrams generated in 3.793 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -119,8 +119,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1523]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1548]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.198 s -Wrote files for 119 helas calls in 0.408 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s +Wrote files for 119 helas calls in 0.397 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -128,14 +128,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.329 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.339 s +ALOHA: aloha creates 10 routines in 0.342 s VVV5 VVV5 FFV1 @@ -176,9 +176,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.621s -user 0m7.014s -sys 0m0.313s +real 0m7.328s +user 0m6.983s +sys 0m0.308s Code generation completed in 8 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index 7bf06a0cbc..1c56450caa 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -72,7 +72,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.14331912994384766  +DEBUG: model prefixing takes 0.14032483100891113  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -87,7 +87,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.821 s +1 processes with 72 diagrams generated in 3.762 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Load PLUGIN.CUDACPP_OUTPUT @@ -109,7 +109,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.191 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.192 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines @@ -136,7 +136,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.255s -user 0m5.156s -sys 0m0.069s -Code generation completed in 5 seconds +real 0m5.180s +user 0m5.084s +sys 0m0.064s +Code generation completed in 6 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/smeft_gg_tttt.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 2949288e67..d85917df10 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.128 s +1 processes with 6 diagrams generated in 0.127 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -588,7 +588,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 3 routines in 0.188 s +ALOHA: aloha creates 3 routines in 0.195 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines @@ -630,9 +630,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.043s -user 0m2.728s -sys 0m0.310s +real 0m3.199s +user 0m2.720s +sys 0m0.313s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index a659f844a2..a22798887c 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.127 s +1 processes with 6 diagrams generated in 0.126 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Load PLUGIN.CUDACPP_OUTPUT @@ -592,7 +592,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.529s -user 0m1.309s -sys 0m0.053s +real 0m1.364s +user 0m1.286s +sys 0m0.065s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_t1t1.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 79fe1a685b..c15c42381b 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.126 s +1 processes with 3 diagrams generated in 0.125 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Load PLUGIN.CUDACPP_OUTPUT @@ -582,16 +582,16 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1547]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1548]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.079 s +Wrote files for 10 helas calls in 0.077 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.139 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.140 s VVV1 FFV1 FFV1 @@ -627,9 +627,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.036s -user 0m2.643s -sys 0m0.301s +real 0m3.173s +user 0m2.612s +sys 0m0.319s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index e18785a5e5..cc7498186d 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -549,7 +549,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.122 s +1 processes with 3 diagrams generated in 0.121 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Load PLUGIN.CUDACPP_OUTPUT @@ -575,7 +575,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.141 s +ALOHA: aloha creates 2 routines in 0.139 s VVV1 FFV1 FFV1 @@ -590,7 +590,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m1.376s -user 0m1.241s -sys 0m0.064s +real 0m1.303s +user 0m1.226s +sys 0m0.063s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk index 5ffb286fef..143147056e 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/susy_gg_tt.sa/SubProcesses/cudacpp.mk @@ -174,7 +174,7 @@ ifeq ($(BACKEND),cuda) GPULANGUAGE = cu GPUSUFFIX = cuda - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) # NVidia CUDA architecture flags @@ -235,9 +235,12 @@ else ifeq ($(BACKEND),hip) GPULANGUAGE = hip GPUSUFFIX = hip - # Basic compiler flags (optimization and includes) + # Optimization flags GPUFLAGS = $(foreach opt, $(OPTFLAGS), $(XCOMPILERFLAG) $(opt)) + # DEBUG FLAGS (for #806: see https://hackmd.io/@gmarkoma/lumi_finland) + ###GPUFLAGS += -ggdb # FOR DEBUGGING ONLY + # AMD HIP architecture flags GPUARCHFLAGS = --offload-arch=gfx90a GPUFLAGS += $(GPUARCHFLAGS) From 3cc028043433218c9ab466a8ae729dd1b89378c1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 19 Sep 2024 11:36:27 +0300 Subject: [PATCH 62/76] [amd] rerun 30 tmad tests on LUMI against AMD GPUs - all as expected (heft fail #833, skip ggttggg #933, gqttq crash #806) STARTED AT Wed 18 Sep 2024 09:02:01 PM EEST (SM tests) ENDED(1) AT Wed 18 Sep 2024 11:40:09 PM EEST [Status=0] (BSM tests) ENDED(1) AT Wed 18 Sep 2024 11:48:33 PM EEST [Status=0] 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 442 +++++----------- .../log_eemumu_mad_f_inl0_hrd0.txt | 464 ++++++---------- .../log_eemumu_mad_m_inl0_hrd0.txt | 460 ++++++---------- .../log_ggtt_mad_d_inl0_hrd0.txt | 452 ++++++---------- .../log_ggtt_mad_f_inl0_hrd0.txt | 458 ++++++---------- .../log_ggtt_mad_m_inl0_hrd0.txt | 454 ++++++---------- .../log_ggttg_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttg_mad_f_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttg_mad_m_inl0_hrd0.txt | 464 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 460 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 463 ++++++---------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 488 ++++------------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 492 ++++------------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 488 ++++------------- .../log_gqttq_mad_d_inl0_hrd0.txt | 500 +++++------------- .../log_gqttq_mad_f_inl0_hrd0.txt | 498 +++++------------ .../log_gqttq_mad_m_inl0_hrd0.txt | 496 +++++------------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 98 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 472 ++++++----------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 462 ++++++---------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 462 ++++++---------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 446 +++++----------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 456 ++++++---------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 456 ++++++---------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 456 ++++++---------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 462 ++++++---------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 458 ++++++---------- 30 files changed, 4242 insertions(+), 9375 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index a32be077f9..d492b5ffc7 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_13:40:30 +DATE: 2024-09-18_21:09:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7399s - [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5500s + [COUNTERS] Fortran MEs ( 1 ) : 0.0062s for 8192 events => throughput is 1.33E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2197s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2120s - [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1453s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7224s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s - [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 81920 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3922s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3318s + [COUNTERS] Fortran MEs ( 1 ) : 0.0604s for 81920 events => throughput is 1.36E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1621s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1557s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0720s for 81920 events => throughput is 1.14E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3308s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0614s for 81920 events => throughput is 1.33E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150298e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.392236e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170213e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.408701e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2122s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.2544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.08E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,9 +239,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 81920 events => throughput is 1.86E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 81920 events => throughput is 2.11E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -263,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910014e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.211611e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.998657e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.233261e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,14 +294,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1501s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1473s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 81920 events => throughput is 2.45E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3615s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 81920 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -343,96 +343,22 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.599120e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.235818e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.638604e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 81920 events => throughput is 2.52E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.340621e+06 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.664038e+06 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.739981e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.02E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4669s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0063s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789448173971E-002) differ by less than 3E-14 (0.0) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,149 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0401s for 81920 events => throughput is 2.05E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.061679e+06 ) sec^-1 + [COUNTERS] PROGRAM TOTAL : 0.6451s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6280s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 81920 events => throughput is 7.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0065s -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.165474e+06 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6554s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103909519892E-002) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0937s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.06E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.180467e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.484588e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.444487e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.562941e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.131686e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.728023e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.605423e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.985276e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.162553e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.731572e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.757987e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.884754e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.185886e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.728413e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.069069e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.564490e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index d760c23b34..960f3f0cd1 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_13:40:49 +DATE: 2024-09-18_21:09:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7356s - [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5195s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2147s - [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1528s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1467s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7385s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6605s - [COUNTERS] Fortran MEs ( 1 ) : 0.0780s for 81920 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3857s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3269s + [COUNTERS] Fortran MEs ( 1 ) : 0.0588s for 81920 events => throughput is 1.39E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432776035199060E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.13E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.1557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432776035199060E-002) differ by less than 4E-4 (1.4511057155885965e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711090687154856E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0680s for 81920 events => throughput is 1.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3852s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0518s for 81920 events => throughput is 1.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711090687154856E-002) differ by less than 4E-4 (1.4417409099909406e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.221041e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.768544e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.228624e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.742150e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432793908398633E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2147s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2117s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.1526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793908398633E-002) differ by less than 4E-4 (4.8253706141920816e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711108423277371E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 81920 events => throughput is 2.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 81920 events => throughput is 3.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108423277371E-002) differ by less than 4E-4 (4.921713170347175e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.101743e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.922218e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.221746e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.998544e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432793820194981E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.1531s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1509s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793820194981E-002) differ by less than 4E-4 (4.729945990433748e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711108407854763E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6467s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 81920 events => throughput is 3.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3354s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 81920 events => throughput is 4.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108407854763E-002) differ by less than 4E-4 (4.904896666602099e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.474277e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.584047e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2174s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2146s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6505s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 81920 events => throughput is 3.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.456987e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.393817e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708350e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.548344e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2238s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 81920 events => throughput is 3.19E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.378249e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.571882e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432778459280288E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6559s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6524s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5712s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5600s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0058s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432778459280288E-002) differ by less than 4E-4 (1.1888523265835005e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711093172690286E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0956s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 81920 events => throughput is 1.07E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 81920 events => throughput is 9.68E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08) +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711093172690286E-002) differ by less than 4E-4 (1.1707229707891287e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.223914e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.584467e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.489581e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.552819e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.046619e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.782998e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.917172e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.597196e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.064257e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.758103e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.895930e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.537742e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.656385e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.157651e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.636564e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.529571e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 3678e8e364..6cb007d911 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_13:41:08 +DATE: 2024-09-18_21:09:48 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7466s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7391s - [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.5285s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5224s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2178s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1532s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1471s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7260s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6506s - [COUNTERS] Fortran MEs ( 1 ) : 0.0754s for 81920 events => throughput is 1.09E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3961s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3355s + [COUNTERS] Fortran MEs ( 1 ) : 0.0605s for 81920 events => throughput is 1.35E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2127s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1552s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 81920 events => throughput is 1.10E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0601s for 81920 events => throughput is 1.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.133764e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.411192e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.147681e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.482269e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1505s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.29E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6934s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 81920 events => throughput is 1.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3339s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0379s for 81920 events => throughput is 2.16E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996644e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.275066e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.048925e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.326149e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789444494401E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2177s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2140s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1522s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1494s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.96E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444494401E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103899063479E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6561s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 81920 events => throughput is 2.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.5429s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5088s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 81920 events => throughput is 2.42E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063479E-002) differ by less than 2E-4 (1.1401468658078784e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.590922e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.665063e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2133s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6867s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 81920 events => throughput is 2.45E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.604540e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.197053e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.589532e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2176s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2134s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6977s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6595s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0378s for 81920 events => throughput is 2.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.283314e+06 ) sec^-1 -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.201898e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.266955e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789437826984E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.4473s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4354s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789437826984E-002) differ by less than 2E-4 (1.1194067894848558e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0910s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 81920 events => throughput is 9.79E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 81920 events => throughput is 7.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.081337e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.524793e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.286137e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.562476e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.251289e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.751283e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.774363e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.037405e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.280291e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.765527e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.840047e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.896806e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.258897e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.709964e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.124478e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.586592e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 21d2f45edf..3f2d9bdd43 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_13:41:27 +DATE: 2024-09-18_21:10:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8485s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8070s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6912s + [COUNTERS] Fortran MEs ( 1 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4077s - [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3203s + [COUNTERS] Fortran MEs ( 1 ) : 0.0292s for 8192 events => throughput is 2.80E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9562s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5349s - [COUNTERS] Fortran MEs ( 1 ) : 0.4212s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5007s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1830s + [COUNTERS] Fortran MEs ( 1 ) : 0.3177s for 81920 events => throughput is 2.58E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3723s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3367s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0354s for 8192 events => throughput is 2.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268150] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9866s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4468s for 81920 events => throughput is 1.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1760s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3505s for 81920 events => throughput is 2.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268150) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.851914e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.370663e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.872591e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.352510e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4375s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4102s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.04E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268164] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7905s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5386s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2515s for 81920 events => throughput is 3.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.4468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2378s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2088s for 81920 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268164) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.305635e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.992808e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.371218e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.008058e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3458s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.66E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7022s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1579s for 81920 events => throughput is 5.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.2924s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1706s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1216s for 81920 events => throughput is 6.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.172913e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.897510e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.264385e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.943522e+05 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5367s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1435s for 81920 events => throughput is 5.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.786323e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.850142e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4275s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7739s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2259s for 81920 events => throughput is 3.63E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.522776e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.620788e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,9 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -534,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8511s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.6227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6088s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.138611968034162) and hip (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9977s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 81920 events => throughput is 8.83E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.5269s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5020s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0177s for 81920 events => throughput is 4.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.144596232268157) and hip (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.921444e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.391294e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.230318e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.401261e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.714613e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.774650e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.316499e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.525092e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.728492e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.778220e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.598150e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.225843e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.745533e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.784062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.694862e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.866082e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 0850891597..01d41bcb36 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_13:41:56 +DATE: 2024-09-18_21:10:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8368s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7947s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6353s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6036s + [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4076s - [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3566s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3248s + [COUNTERS] Fortran MEs ( 1 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9677s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5472s - [COUNTERS] Fortran MEs ( 1 ) : 0.4205s for 81920 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5566s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2403s + [COUNTERS] Fortran MEs ( 1 ) : 0.3163s for 81920 events => throughput is 2.59E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138605296829816] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4454s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138605296829816) differ by less than 4E-4 (1.4152313931869998e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144592003933589] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9812s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5583s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.4310s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1286s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3022s for 81920 events => throughput is 2.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144592003933589) differ by less than 4E-4 (8.968863673963767e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959611e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.847123e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.959548e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.968057e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602746994408] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3350s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602746994408) differ by less than 4E-4 (1.956154279669775e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144589414828133] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7203s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1724s for 81920 events => throughput is 4.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.2639s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1411s for 81920 events => throughput is 5.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07) +OK! xsec from fortran (47.144596232268157) and cpp (47.144589414828133) differ by less than 4E-4 (1.44607029572974e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.738872e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.663521e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.748017e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.691631e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602995819163] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4166s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.16E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602995819163) differ by less than 4E-4 (1.9033685183522664e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144587555291501] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6374s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0918s for 81920 events => throughput is 8.93E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1791s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0682s for 81920 events => throughput is 1.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) +OK! xsec from fortran (47.144596232268157) and cpp (47.144587555291501) differ by less than 4E-4 (1.840502910077646e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.120680e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.193326e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4176s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.23E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6345s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 81920 events => throughput is 9.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.788116e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.176918e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.789950e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.189981e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6732s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1222s for 81920 events => throughput is 6.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.765000e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.898629e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138605197694872] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8533s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6623s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0075s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09) +OK! xsec from fortran (47.138611968034162) and hip (47.138605197694872) differ by less than 4E-4 (1.4362619105146024e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144590142508306] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9917s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 81920 events => throughput is 9.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.3935s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3777s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09) +OK! xsec from fortran (47.144596232268157) and hip (47.144590142508306) differ by less than 4E-4 (1.2917195901795964e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.139565e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.708770e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.535095e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.800159e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.504949e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.141802e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.302031e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.784579e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.479596e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.174041e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.319419e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.634532e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.230042e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.761258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.720607e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.329932e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 1cd7f5e3d4..462c7a33d0 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_13:42:23 +DATE: 2024-09-18_21:10:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8020s - [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6487s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6170s + [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s - [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3602s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3285s + [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9802s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5584s - [COUNTERS] Fortran MEs ( 1 ) : 0.4219s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4921s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1747s + [COUNTERS] Fortran MEs ( 1 ) : 0.3175s for 81920 events => throughput is 2.58E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4569s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4109s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 8192 events => throughput is 1.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3635s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 8192 events => throughput is 2.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 2.0058s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4529s for 81920 events => throughput is 1.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5363s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3563s for 81920 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446513367086368e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.833075e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.352222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.846422e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.358203e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4330s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3490s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3279s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597573367527] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.8037s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5546s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2487s for 81920 events => throughput is 3.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.3859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2089s for 81920 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367527) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.318121e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.933966e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.355189e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.951899e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613336664328] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0159s for 8192 events => throughput is 5.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613336664328) differ by less than 2E-4 (2.9034163517849265e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597613828985] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7037s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5502s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1532s for 81920 events => throughput is 5.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.2982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1195s for 81920 events => throughput is 6.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597613828985) differ by less than 2E-4 (2.9304754622927476e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.280868e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.319511e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6742s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5312s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1426s for 81920 events => throughput is 5.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.827488e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.054164e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.962674e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.114502e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7709s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2228s for 81920 events => throughput is 3.68E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662408e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.633047e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611963547795] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8496s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8458s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.6425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6292s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11) +OK! xsec from fortran (47.138611968034162) and hip (47.138611963547795) differ by less than 2E-4 (9.517397980829401e-11) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232269080] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9898s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.5279s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5024s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0177s for 81920 events => throughput is 4.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0078s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14) +OK! xsec from fortran (47.144596232268157) and hip (47.144596232269080) differ by less than 2E-4 (1.9539925233402755e-14) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.961867e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.389948e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.402195e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.430104e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.751023e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.792157e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.487612e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.485305e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.767038e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.790389e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.725223e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.203783e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.748403e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.775819e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.694986e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.875599e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 652edcf84f..c80769695f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:42:52 +DATE: 2024-09-18_21:10:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7493s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4200s - [COUNTERS] Fortran MEs ( 1 ) : 0.3293s for 8192 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s + [COUNTERS] Fortran MEs ( 1 ) : 0.2327s for 8192 events => throughput is 3.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3834s - [COUNTERS] Fortran MEs ( 1 ) : 0.3316s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3055s + [COUNTERS] Fortran MEs ( 1 ) : 0.2333s for 8192 events => throughput is 3.51E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.1953s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8899s - [COUNTERS] Fortran MEs ( 1 ) : 3.3054s for 81920 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4842s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3122s + [COUNTERS] Fortran MEs ( 1 ) : 2.1720s for 81920 events => throughput is 3.77E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7369s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3477s for 8192 events => throughput is 2.36E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + [COUNTERS] PROGRAM TOTAL : 0.5432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2880s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2546s for 8192 events => throughput is 3.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.3596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8952s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4633s for 81920 events => throughput is 2.37E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 4.2062s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3840s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8215s for 81920 events => throughput is 2.90E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.471888e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.988318e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.456119e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.012656e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5680s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 8192 events => throughput is 4.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.4562s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1419s for 8192 events => throughput is 5.77E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6931s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8866s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8056s for 81920 events => throughput is 4.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 2.8213s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4027s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4181s for 81920 events => throughput is 5.78E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.653702e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.842744e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.694721e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.866708e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0905s for 8192 events => throughput is 9.05E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3131s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720207E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606505E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8119s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9041s for 81920 events => throughput is 9.06E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.1081s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3980s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7097s for 81920 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606505E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.368531e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.204129e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.254727e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4668s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0809s for 8192 events => throughput is 1.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.198782e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.6912s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8083s for 81920 events => throughput is 1.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.047750e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.045236e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,110 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5013s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3866s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1140s for 8192 events => throughput is 7.19E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7001s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0143s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.0360s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1383s for 81920 events => throughput is 7.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.160157e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.320569e+04 ) sec^-1 +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8352s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8227s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3543s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3252s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 1.8408s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7337s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 81920 events => throughput is 8.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0139s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.134986e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.013717e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.475726e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.115536e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.339604e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.642640e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.161734e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.831605e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.354476e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.514831e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.170951e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.707768e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.318892e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.631145e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662470e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.701309e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 3362abfbc9..d7948fe70d 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=hip - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:43:35 +DATE: 2024-09-18_21:11:27 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s - [COUNTERS] Fortran MEs ( 1 ) : 0.3301s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5642s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s + [COUNTERS] Fortran MEs ( 1 ) : 0.2324s for 8192 events => throughput is 3.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3839s - [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5418s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s + [COUNTERS] Fortran MEs ( 1 ) : 0.2327s for 8192 events => throughput is 3.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.1779s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8749s - [COUNTERS] Fortran MEs ( 1 ) : 3.3030s for 81920 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4075s + [COUNTERS] Fortran MEs ( 1 ) : 2.3113s for 81920 events => throughput is 3.54E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474238393007253E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3896s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3317s for 8192 events => throughput is 2.47E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.5702s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2572s for 8192 events => throughput is 3.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474238393007253E-002) differ by less than 4E-4 (1.6693007842683016e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971543373778375E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2162s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8886s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3267s for 81920 events => throughput is 2.46E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 3.9659s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4061s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5591s for 81920 events => throughput is 3.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971543373778375E-002) differ by less than 4E-4 (1.8503863641328167e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539712e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.306143e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530008e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.307654e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474229018345096E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1015s for 8192 events => throughput is 8.07E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474229018345096E-002) differ by less than 4E-4 (2.8639171045785616e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971534528332888E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.9019s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8838s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0176s for 81920 events => throughput is 8.05E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.2208s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4149s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8056s for 81920 events => throughput is 1.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971534528332888E-002) differ by less than 4E-4 (2.9564602843645815e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.124328e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.039215e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.199350e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.034006e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474228627553363E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4313s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0467s for 8192 events => throughput is 1.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474228627553363E-002) differ by less than 4E-4 (2.9137158252812156e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971533958864222E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3507s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.7725s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3692s for 81920 events => throughput is 2.22E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971533958864222E-002) differ by less than 4E-4 (3.027669184252346e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.796536e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.786454e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4292s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3860s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3290s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4263s for 81920 events => throughput is 1.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.981352e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.411160e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.997761e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0556s for 8192 events => throughput is 1.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.427122e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.4713s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5643s for 81920 events => throughput is 1.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.458815e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.487461e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474239700037612E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8337s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8291s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.6449s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0134s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07) +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474239700037612E-002) differ by less than 4E-4 (1.5027454702831733e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971544830799671E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3421s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 81920 events => throughput is 6.21E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.7547s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 81920 events => throughput is 2.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0137s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08) +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971544830799671E-002) differ by less than 4E-4 (1.6681939285501102e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.709678e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.086951e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.936833e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.086184e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.247414e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.463630e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.199841e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.705248e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.195768e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.471416e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.278448e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.726540e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.108387e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.330755e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.230857e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.027556e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 4de53c2d38..bc0a9b927a 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:44:15 +DATE: 2024-09-18_21:11:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s - [COUNTERS] Fortran MEs ( 1 ) : 0.3310s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3257s + [COUNTERS] Fortran MEs ( 1 ) : 0.2329s for 8192 events => throughput is 3.52E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3840s - [COUNTERS] Fortran MEs ( 1 ) : 0.3281s for 8192 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s + [COUNTERS] Fortran MEs ( 1 ) : 0.2303s for 8192 events => throughput is 3.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.1945s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8920s - [COUNTERS] Fortran MEs ( 1 ) : 3.3025s for 81920 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.7124s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3979s + [COUNTERS] Fortran MEs ( 1 ) : 2.3145s for 81920 events => throughput is 3.54E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252272193679E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3850s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 8192 events => throughput is 2.34E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.5952s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2852s for 8192 events => throughput is 2.87E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252272193679E-002) differ by less than 2E-4 (9.93285631523122e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558933520065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.4502s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8950s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5541s for 81920 events => throughput is 2.30E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 4.2315s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4023s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8285s for 81920 events => throughput is 2.90E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558933520065E-002) differ by less than 2E-4 (9.527307387457995e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.405336e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.939184e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.411690e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.931342e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252220105081E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 8192 events => throughput is 4.58E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.4546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1437s for 8192 events => throughput is 5.70E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252220105081E-002) differ by less than 2E-4 (9.269089717989232e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558934000736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8846s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8089s for 81920 events => throughput is 4.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.8365s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4064s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.4297s for 81920 events => throughput is 5.73E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558934000736E-002) differ by less than 2E-4 (9.53331791286871e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.686401e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.769522e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.704142e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.769710e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252077403842E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.18E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3056s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252077403842E-002) differ by less than 2E-4 (7.450642991457812e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558777659491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.7937s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8977s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8954s for 81920 events => throughput is 9.15E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 2.0941s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7020s for 81920 events => throughput is 1.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558777659491E-002) differ by less than 2E-4 (7.578357275050962e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.255440e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.199948e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.362786e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0782s for 8192 events => throughput is 1.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.306810e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.6895s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8933s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7956s for 81920 events => throughput is 1.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.053887e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.065947e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251477062731E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5056s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 8192 events => throughput is 6.95E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.6386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6104s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0142s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09) +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251477062731E-002) differ by less than 2E-4 (1.9952373087051e-10) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,149 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558174786780E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.0551s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8834s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1711s for 81920 events => throughput is 7.00E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + [COUNTERS] PROGRAM TOTAL : 1.8413s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0936s for 81920 events => throughput is 8.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0144s -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.918098e+04 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.013587e+04 ) sec^-1 +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558174786780E-002) differ by less than 2E-4 (3.976818874207311e-11) -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8394s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8269s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3560s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3268s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0262s for 81920 events => throughput is 3.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.114701e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.091802e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.463889e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.071153e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.291446e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.640320e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155947e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.832709e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.267147e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.638301e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.165743e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.710488e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.239896e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.630540e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.648131e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.702853e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index da4192a0d3..6b9f2afff2 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:44:59 +DATE: 2024-09-18_21:12:28 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.6910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s - [COUNTERS] Fortran MEs ( 1 ) : 4.3146s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.1672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s + [COUNTERS] Fortran MEs ( 1 ) : 2.8355s for 8192 events => throughput is 2.89E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.5801s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s - [COUNTERS] Fortran MEs ( 1 ) : 4.2843s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.0939s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2445s + [COUNTERS] Fortran MEs ( 1 ) : 2.8494s for 8192 events => throughput is 2.87E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.0141s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s - [COUNTERS] Fortran MEs ( 1 ) : 42.9445s for 81920 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.0793s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5366s + [COUNTERS] Fortran MEs ( 1 ) : 28.5427s for 81920 events => throughput is 2.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.7546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4446s for 8192 events => throughput is 1.84E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s + [COUNTERS] PROGRAM TOTAL : 3.8844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2797s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5973s for 8192 events => throughput is 2.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283632] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 46.6278s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0620s - [COUNTERS] CudaCpp MEs ( 2 ) : 44.5568s for 81920 events => throughput is 1.84E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s + [COUNTERS] PROGRAM TOTAL : 37.7445s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5656s + [COUNTERS] CudaCpp MEs ( 2 ) : 36.1719s for 81920 events => throughput is 2.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283632) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.897024e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.374831e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.894466e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.230830e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926832] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.6753s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3715s for 8192 events => throughput is 3.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s + [COUNTERS] PROGRAM TOTAL : 2.0045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2685s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7319s for 8192 events => throughput is 4.73E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0041s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926832) differ by less than 3E-14 (1.2212453270876722e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283630] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 25.8378s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0749s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.7582s for 81920 events => throughput is 3.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 18.9916s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s + [COUNTERS] CudaCpp MEs ( 2 ) : 17.4487s for 81920 events => throughput is 4.69E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283630) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.534769e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.829240e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.572800e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.836399e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926854] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.3401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0420s for 8192 events => throughput is 7.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.0244s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7634s for 8192 events => throughput is 1.07E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926854) differ by less than 3E-14 (5.551115123125783e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283624] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.4720s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0723s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3972s for 81920 events => throughput is 7.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 9.5121s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5355s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.9749s for 81920 events => throughput is 1.03E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283624) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.115646e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.135568e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.2200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9191s for 8192 events => throughput is 8.91E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 11.2276s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0702s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.1552s for 81920 events => throughput is 8.95E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.261139e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.094613e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.284539e+03 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1801s for 8192 events => throughput is 6.94E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.101634e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.8631s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0851s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7752s for 81920 events => throughput is 6.96E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.044735e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.087685e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 8192 events => throughput is 2.10E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + [COUNTERS] PROGRAM TOTAL : 0.8926s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 8192 events => throughput is 7.32E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0993s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and hip (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283644] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.9095s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3260s for 81920 events => throughput is 2.51E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + [COUNTERS] PROGRAM TOTAL : 3.0388s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8723s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0668s for 81920 events => throughput is 7.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0997s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930270975283644) differ by less than 3E-14 (8.881784197001252e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.150288e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.452923e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.340464e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.410641e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120076e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.693558e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169270e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.121248e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.124208e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.696136e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169177e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.679912e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120876e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.692445e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.432039e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.546236e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index d51442efc8..096b3f824b 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=hip - - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:48:49 +DATE: 2024-09-18_21:15:41 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.5851s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2964s - [COUNTERS] Fortran MEs ( 1 ) : 4.2887s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.0802s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s + [COUNTERS] Fortran MEs ( 1 ) : 2.8384s for 8192 events => throughput is 2.89E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.5902s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s - [COUNTERS] Fortran MEs ( 1 ) : 4.2983s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.0676s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2470s + [COUNTERS] Fortran MEs ( 1 ) : 2.8206s for 8192 events => throughput is 2.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.0593s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0904s - [COUNTERS] Fortran MEs ( 1 ) : 42.9689s for 81920 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.1218s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5491s + [COUNTERS] Fortran MEs ( 1 ) : 28.5727s for 81920 events => throughput is 2.87E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145004642682091] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3295s for 8192 events => throughput is 1.89E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 3.7398s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4741s for 8192 events => throughput is 2.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33145004642682091) differ by less than 4E-4 (4.6745046844431926e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,39 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930342252742398] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.4961s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0670s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.4203s for 81920 events => throughput is 1.89E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s + [COUNTERS] PROGRAM TOTAL : 36.6310s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6637s + [COUNTERS] CudaCpp MEs ( 2 ) : 34.9569s for 81920 events => throughput is 2.34E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0105s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930342252742398) differ by less than 4E-4 (3.405472335016313e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953905e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.440002e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953638e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.451889e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -205,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144996928807552] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.5948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2947s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2975s for 8192 events => throughput is 6.31E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s + [COUNTERS] PROGRAM TOTAL : 1.1311s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2575s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8713s for 8192 events => throughput is 9.40E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144996928807552) differ by less than 4E-4 (4.441772461838411e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -240,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930338466143997] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 14.0680s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0718s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9937s for 81920 events => throughput is 6.83E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 10.2003s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5319s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.6665s for 81920 events => throughput is 9.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930338466143997) differ by less than 4E-4 (3.2245574101974483e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.014424e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.681677e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.021993e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.754711e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145003508801812] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8304s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5290s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3938s for 8192 events => throughput is 2.08E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33145003508801812) differ by less than 4E-4 (4.6402948361556895e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -320,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930341333868943] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.2994s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.2285s for 81920 events => throughput is 1.57E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [COUNTERS] PROGRAM TOTAL : 5.6819s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5913s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.0897s for 81920 events => throughput is 2.00E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930341333868943) differ by less than 4E-4 (3.361570683813042e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.599753e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.600977e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7627s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4666s for 8192 events => throughput is 1.76E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 6.8208s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0681s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7514s for 81920 events => throughput is 1.72E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.808420e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.179139e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.816362e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.157595e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2954s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 8192 events => throughput is 1.39E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.9475s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0831s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8629s for 81920 events => throughput is 1.40E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.420878e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.411954e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -525,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145003213125773] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7891s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s + [COUNTERS] PROGRAM TOTAL : 0.9162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7390s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1044s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06) +OK! xsec from fortran (0.33144849706926871) and hip (0.33145003213125773) differ by less than 4E-4 (4.631374112662456e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -560,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930346912077236] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.8062s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5479s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2338s for 81920 events => throughput is 3.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s + [COUNTERS] PROGRAM TOTAL : 2.6538s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6818s for 81920 events => throughput is 1.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1057s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930346912077236) differ by less than 4E-4 (3.6280845909786308e-06) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.088372e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.150702e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.376508e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.789281e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.114154e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.112095e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.259362e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.070936e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.087087e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.122768e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.241028e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.698596e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.079549e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.056109e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.391392e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.797688e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 4029a4bd08..5d6c5fa5e3 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_13:51:53 +DATE: 2024-09-18_21:18:52 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.5765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s - [COUNTERS] Fortran MEs ( 1 ) : 4.2815s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.2993s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s + [COUNTERS] Fortran MEs ( 1 ) : 2.9845s for 8192 events => throughput is 2.74E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2967s - [COUNTERS] Fortran MEs ( 1 ) : 4.3148s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.0889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2453s + [COUNTERS] Fortran MEs ( 1 ) : 2.8435s for 8192 events => throughput is 2.88E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.1244s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0863s - [COUNTERS] Fortran MEs ( 1 ) : 43.0382s for 81920 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 30.1871s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5311s + [COUNTERS] Fortran MEs ( 1 ) : 28.6560s for 81920 events => throughput is 2.86E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849880304822] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.8194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2986s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5116s for 8192 events => throughput is 1.82E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0093s + [COUNTERS] PROGRAM TOTAL : 3.8463s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5926s for 8192 events => throughput is 2.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849880304822) differ by less than 2E-4 (5.230916810816666e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271054111049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 47.3150s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0784s - [COUNTERS] CudaCpp MEs ( 2 ) : 45.2275s for 81920 events => throughput is 1.81E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s + [COUNTERS] PROGRAM TOTAL : 37.5708s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5608s + [COUNTERS] CudaCpp MEs ( 2 ) : 36.0030s for 81920 events => throughput is 2.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271054111049) differ by less than 2E-4 (3.766192246956734e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.873631e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.356933e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.874665e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.377720e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849797290254] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.6800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3756s for 8192 events => throughput is 3.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 1.9860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2638s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7191s for 8192 events => throughput is 4.77E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849797290254) differ by less than 2E-4 (2.7263173940639263e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271025983213] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 25.8989s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0743s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.8193s for 81920 events => throughput is 3.44E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0054s + [COUNTERS] PROGRAM TOTAL : 18.9773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5499s + [COUNTERS] CudaCpp MEs ( 2 ) : 17.4239s for 81920 events => throughput is 4.70E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0035s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025983213) differ by less than 2E-4 (2.4223090200337083e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.519279e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.947460e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.530635e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.987211e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849773665513] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.3351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2976s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0351s for 8192 events => throughput is 7.91E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 1.0155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7655s for 8192 events => throughput is 1.07E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849773665513) differ by less than 2E-4 (2.013544886381169e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271025898603] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.4644s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0722s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.3898s for 81920 events => throughput is 7.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [COUNTERS] PROGRAM TOTAL : 9.4472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5509s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.8947s for 81920 events => throughput is 1.04E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025898603) differ by less than 2E-4 (2.418266698001048e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.009062e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.120880e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.1951s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2962s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8968s for 8192 events => throughput is 9.13E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 11.2498s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0961s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.1516s for 81920 events => throughput is 8.95E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.318473e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.080026e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.350512e+03 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4981s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3004s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1949s for 8192 events => throughput is 6.86E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.101222e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.9738s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0733s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.8977s for 81920 events => throughput is 6.89E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.010058e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.974396e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849679653593] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7381s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [COUNTERS] PROGRAM TOTAL : 0.7773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 8192 events => throughput is 7.32E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0994s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10) +OK! xsec from fortran (0.33144849706926871) and hip (0.33144849679653593) differ by less than 2E-4 (8.228511205743416e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271009954451] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.9001s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5282s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3371s for 81920 events => throughput is 2.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s + [COUNTERS] PROGRAM TOTAL : 3.0159s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8455s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0702s for 81920 events => throughput is 7.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1001s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930271009954451) differ by less than 2E-4 (1.6564918325912004e-09) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156591e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.427960e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.143626e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.410829e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122372e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.681706e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.161172e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.117500e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.154782e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.677894e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.164268e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.670389e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.117598e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.680352e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.420328e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.532654e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index b5fe53dcd6..e4dcd810b0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - - -make USEBUILDDIR=1 BACKEND=cuda - - -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 - -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-09-18_13:57:21 +DATE: 2024-09-18_21:22:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.9942s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5376s - [COUNTERS] Fortran MEs ( 1 ) : 100.4566s for 8192 events => throughput is 8.15E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.7332s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4667s + [COUNTERS] Fortran MEs ( 1 ) : 62.2665s for 8192 events => throughput is 1.32E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.8250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5356s - [COUNTERS] Fortran MEs ( 1 ) : 100.2893s for 8192 events => throughput is 8.17E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.6313s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3935s + [COUNTERS] Fortran MEs ( 1 ) : 62.2378s for 8192 events => throughput is 1.32E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1009.3485s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5474s - [COUNTERS] Fortran MEs ( 1 ) : 1004.8011s for 81920 events => throughput is 8.15E+01 events/s + [COUNTERS] PROGRAM TOTAL : 625.5755s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1317s + [COUNTERS] Fortran MEs ( 1 ) : 622.4438s for 81920 events => throughput is 1.32E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +104,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729949E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 122.6272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5269s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.8976s for 8192 events => throughput is 6.72E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2027s + [COUNTERS] PROGRAM TOTAL : 99.4442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4474s + [COUNTERS] CudaCpp MEs ( 2 ) : 98.8367s for 8192 events => throughput is 8.29E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1601s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729949E-007) differ by less than 3E-14 (3.552713678800501e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +139,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1215.7257s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3426s - [COUNTERS] CudaCpp MEs ( 2 ) : 1211.1771s for 81920 events => throughput is 6.76E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2060s + [COUNTERS] PROGRAM TOTAL : 986.3468s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0893s + [COUNTERS] CudaCpp MEs ( 2 ) : 983.1056s for 81920 events => throughput is 8.33E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1519s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.947835e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.044799e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.953028e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.046587e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +184,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729943E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 65.0412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5159s - [COUNTERS] CudaCpp MEs ( 2 ) : 64.4246s for 8192 events => throughput is 1.27E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1007s + [COUNTERS] PROGRAM TOTAL : 49.4765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4488s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.8907s for 8192 events => throughput is 1.68E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1370s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729943E-007) differ by less than 3E-14 (3.3306690738754696e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +219,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333069E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 643.1942s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3363s - [COUNTERS] CudaCpp MEs ( 2 ) : 638.7557s for 81920 events => throughput is 1.28E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1021s + [COUNTERS] PROGRAM TOTAL : 494.5399s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0807s + [COUNTERS] CudaCpp MEs ( 2 ) : 491.3813s for 81920 events => throughput is 1.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0779s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333069E-007) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.582676e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.084386e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.328359e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.067891e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +264,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729933E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.5049s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5111s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.9480s for 8192 events => throughput is 2.93E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s + [COUNTERS] PROGRAM TOTAL : 22.9679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4156s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.4381s for 8192 events => throughput is 3.65E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1142s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729933E-007) differ by less than 3E-14 (2.886579864025407e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,309 +299,45 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 284.0441s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3313s - [COUNTERS] CudaCpp MEs ( 2 ) : 279.6676s for 81920 events => throughput is 2.93E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s + [COUNTERS] PROGRAM TOTAL : 226.5648s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1182s + [COUNTERS] CudaCpp MEs ( 2 ) : 223.4112s for 81920 events => throughput is 3.67E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0354s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.544656e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.429932e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.4242s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5110s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8732s for 8192 events => throughput is 3.29E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0401s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 258.4274s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3572s - [COUNTERS] CudaCpp MEs ( 2 ) : 254.0306s for 81920 events => throughput is 3.22E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0397s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.924332e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.943882e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 26.1506s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.5777s for 8192 events => throughput is 3.20E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 258.8965s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4685s - [COUNTERS] CudaCpp MEs ( 2 ) : 254.3821s for 81920 events => throughput is 3.22E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.459682e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.599435e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.450518e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 3.2103s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0334s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1011s for 8192 events => throughput is 7.44E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0758s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 16.9165s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9531s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8819s for 81920 events => throughput is 7.53E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0816s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.508582e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.240924e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.270483e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.589702e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.287293e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.538954e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.424280e+03 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.273685e+03 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.243085e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 2a956cd657..381d54d555 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cppnone - - -make USEBUILDDIR=1 BACKEND=cppsse4 - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-09-18_15:14:59 +DATE: 2024-09-18_22:11:08 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.8383s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5370s - [COUNTERS] Fortran MEs ( 1 ) : 100.3014s for 8192 events => throughput is 8.17E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.6606s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3995s + [COUNTERS] Fortran MEs ( 1 ) : 62.2611s for 8192 events => throughput is 1.32E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.6800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5304s - [COUNTERS] Fortran MEs ( 1 ) : 100.1496s for 8192 events => throughput is 8.18E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.7300s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4148s + [COUNTERS] Fortran MEs ( 1 ) : 62.3153s for 8192 events => throughput is 1.31E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1006.5135s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5331s - [COUNTERS] Fortran MEs ( 1 ) : 1001.9804s for 81920 events => throughput is 8.18E+01 events/s + [COUNTERS] PROGRAM TOTAL : 626.2858s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1968s + [COUNTERS] Fortran MEs ( 1 ) : 623.0890s for 81920 events => throughput is 1.31E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +104,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575308139230432E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 113.7634s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5261s - [COUNTERS] CudaCpp MEs ( 2 ) : 113.0501s for 8192 events => throughput is 7.25E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1871s + [COUNTERS] PROGRAM TOTAL : 101.5057s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4418s + [COUNTERS] CudaCpp MEs ( 2 ) : 100.8607s for 8192 events => throughput is 8.12E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2031s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575308139230432E-007) differ by less than 4E-4 (0.0001395002856556804) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,39 +140,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846099389242361E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1135.0851s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4478s - [COUNTERS] CudaCpp MEs ( 2 ) : 1130.4514s for 81920 events => throughput is 7.25E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1858s + [COUNTERS] PROGRAM TOTAL : 1010.3499s + [COUNTERS] Fortran Overhead ( 0 ) : 3.2645s + [COUNTERS] CudaCpp MEs ( 2 ) : 1006.8322s for 81920 events => throughput is 8.14E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2532s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846099389242361E-007) differ by less than 4E-4 (0.00014187637267237818) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.611057e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.652228e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.618948e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.705470e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -206,25 +186,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575303913232094E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.2816s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5245s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.7095s for 8192 events => throughput is 2.96E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0476s + [COUNTERS] PROGRAM TOTAL : 23.5925s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4382s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.0566s for 8192 events => throughput is 3.55E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0978s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575303913232094E-007) differ by less than 4E-4 (0.00013932100537483727) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -242,39 +222,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846096068245575E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 283.1165s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4533s - [COUNTERS] CudaCpp MEs ( 2 ) : 278.6156s for 81920 events => throughput is 2.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s + [COUNTERS] PROGRAM TOTAL : 233.7583s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1877s + [COUNTERS] CudaCpp MEs ( 2 ) : 230.5373s for 81920 events => throughput is 3.55E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846096068245575E-007) differ by less than 4E-4 (0.00014173098820635666) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.409666e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.323061e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.406819e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.315479e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -288,25 +268,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575304434295576E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 14.8123s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5276s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.2607s for 8192 events => throughput is 5.74E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0240s + [COUNTERS] PROGRAM TOTAL : 11.7038s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4244s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.2050s for 8192 events => throughput is 7.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0744s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575304434295576E-007) differ by less than 4E-4 (0.0001393431105436438) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -324,314 +304,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846087407964351E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 146.8484s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4408s - [COUNTERS] CudaCpp MEs ( 2 ) : 142.3838s for 81920 events => throughput is 5.75E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s + [COUNTERS] PROGRAM TOTAL : 117.0635s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1472s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.8984s for 81920 events => throughput is 7.19E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0179s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846087407964351E-007) differ by less than 4E-4 (0.00014135186397323807) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.847317e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.876249e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 13.2590s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5224s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.7153s for 8192 events => throughput is 6.44E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 130.9182s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4397s - [COUNTERS] CudaCpp MEs ( 2 ) : 126.4573s for 81920 events => throughput is 6.48E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.748756e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.729474e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 13.3128s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5317s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.7560s for 8192 events => throughput is 6.42E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0251s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 131.9849s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4720s - [COUNTERS] CudaCpp MEs ( 2 ) : 127.4891s for 81920 events => throughput is 6.43E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.915808e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.316164e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.934421e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.1536s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5409s for 8192 events => throughput is 1.51E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5574s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 10.9466s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0484s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3690s for 81920 events => throughput is 1.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5292s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.538905e+04 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.534050e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.137147e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.187870e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.144301e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.950771e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.161388e+04 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.156097e+04 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.971114e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index e04ca3f869..22716a3d50 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - - -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cppsse4 - - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-09-18_16:14:25 +DATE: 2024-09-18_22:52:26 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.0730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5330s - [COUNTERS] Fortran MEs ( 1 ) : 100.5400s for 8192 events => throughput is 8.15E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.5715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3992s + [COUNTERS] Fortran MEs ( 1 ) : 62.1724s for 8192 events => throughput is 1.32E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.8963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5347s - [COUNTERS] Fortran MEs ( 1 ) : 100.3617s for 8192 events => throughput is 8.16E+01 events/s + [COUNTERS] PROGRAM TOTAL : 62.4809s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4131s + [COUNTERS] Fortran MEs ( 1 ) : 62.0678s for 8192 events => throughput is 1.32E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1008.5494s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5481s - [COUNTERS] Fortran MEs ( 1 ) : 1004.0013s for 81920 events => throughput is 8.16E+01 events/s + [COUNTERS] PROGRAM TOTAL : 625.4473s + [COUNTERS] Fortran Overhead ( 0 ) : 3.2009s + [COUNTERS] Fortran MEs ( 1 ) : 622.2463s for 81920 events => throughput is 1.32E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +104,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019963403161E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 119.7272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5266s - [COUNTERS] CudaCpp MEs ( 2 ) : 118.9906s for 8192 events => throughput is 6.88E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2100s + [COUNTERS] PROGRAM TOTAL : 98.0342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4335s + [COUNTERS] CudaCpp MEs ( 2 ) : 97.3857s for 8192 events => throughput is 8.41E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2150s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019963403161E-007) differ by less than 2E-4 (5.416306958494488e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +139,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858650293213E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1235.8333s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4485s - [COUNTERS] CudaCpp MEs ( 2 ) : 1231.1755s for 81920 events => throughput is 6.65E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2092s + [COUNTERS] PROGRAM TOTAL : 983.7723s + [COUNTERS] Fortran Overhead ( 0 ) : 3.2081s + [COUNTERS] CudaCpp MEs ( 2 ) : 980.4121s for 81920 events => throughput is 8.36E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1521s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858650293213E-007) differ by less than 2E-4 (5.3828717039294816e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.603593e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.065759e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.563954e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.051001e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +184,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019985761424E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 64.0094s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5236s - [COUNTERS] CudaCpp MEs ( 2 ) : 63.3817s for 8192 events => throughput is 1.29E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1041s + [COUNTERS] PROGRAM TOTAL : 47.7625s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4830s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.2064s for 8192 events => throughput is 1.74E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0730s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019985761424E-007) differ by less than 2E-4 (6.364815563486559e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +219,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858654239918E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 636.9448s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4347s - [COUNTERS] CudaCpp MEs ( 2 ) : 632.4070s for 81920 events => throughput is 1.30E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1031s + [COUNTERS] PROGRAM TOTAL : 475.7938s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1594s + [COUNTERS] CudaCpp MEs ( 2 ) : 472.5616s for 81920 events => throughput is 1.73E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0729s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858654239918E-007) differ by less than 2E-4 (5.555647941690722e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.548168e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.177595e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.548990e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.205740e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +264,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019990398792E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.2427s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5259s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.6712s for 8192 events => throughput is 2.96E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s + [COUNTERS] PROGRAM TOTAL : 22.3346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4024s + [COUNTERS] CudaCpp MEs ( 2 ) : 21.8532s for 8192 events => throughput is 3.75E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0790s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019990398792E-007) differ by less than 2E-4 (6.5615473054947415e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,309 +299,45 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858652988808E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 285.3174s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4390s - [COUNTERS] CudaCpp MEs ( 2 ) : 280.8328s for 81920 events => throughput is 2.92E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s + [COUNTERS] PROGRAM TOTAL : 219.1677s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1155s + [COUNTERS] CudaCpp MEs ( 2 ) : 216.0189s for 81920 events => throughput is 3.79E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858652988808E-007) differ by less than 2E-4 (5.500877753306099e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.559056e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.558709e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.3100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5275s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.7433s for 8192 events => throughput is 3.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0391s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 250.0775s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4353s - [COUNTERS] CudaCpp MEs ( 2 ) : 245.6035s for 81920 events => throughput is 3.34E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0387s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.143810e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119192e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.7695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.1974s for 8192 events => throughput is 3.25E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0470s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 262.3166s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4708s - [COUNTERS] CudaCpp MEs ( 2 ) : 257.7997s for 81920 events => throughput is 3.18E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.519965e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.927048e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518227e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.7761s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8762s for 8192 events => throughput is 9.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8750s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 14.4588s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9583s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.6324s for 81920 events => throughput is 9.49E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8681s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.423002e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.078690e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104813e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152942e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106947e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.877390e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.110409e+04 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106917e+04 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.676393e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 13fa996bcb..658b9ffa5c 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cppsse4 +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_13:55:45 +DATE: 2024-09-18_21:22:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5290s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4573s - [COUNTERS] Fortran MEs ( 1 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5014s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4508s + [COUNTERS] Fortran MEs ( 1 ) : 0.0507s for 8192 events => throughput is 1.62E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4070s - [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3202s + [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5996s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8818s - [COUNTERS] Fortran MEs ( 1 ) : 0.7178s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9115s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3930s + [COUNTERS] Fortran MEs ( 1 ) : 0.5185s for 81920 events => throughput is 1.58E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4885s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4104s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0617s for 8192 events => throughput is 1.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575784] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6882s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9084s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7791s for 81920 events => throughput is 1.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.0389s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6315s for 81920 events => throughput is 1.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575784) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.061513e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323639e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.071810e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.328591e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456874] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4501s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456874) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3263s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8964s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4293s for 81920 events => throughput is 1.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.7525s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3982s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3540s for 81920 events => throughput is 2.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575781) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907415e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.273310e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918851e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.290081e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4325s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3485s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3287s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 8192 events => throughput is 4.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575775] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2488s for 81920 events => throughput is 3.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.6011s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 81920 events => throughput is 4.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575775) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.325644e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.474030e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.338006e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4312s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1356s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9099s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2252s for 81920 events => throughput is 3.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.484493e+05 ) sec^-1 -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.464520e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,184 +370,32 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4489s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4137s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0347s for 8192 events => throughput is 2.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2441s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3367s for 81920 events => throughput is 2.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.393978e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.414185e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8542s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3403s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3289s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.96E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.881632e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.254031e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.277303e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.101959e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.239939e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.286389e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.254121e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640364e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x677d050) on address 0x1493b7ab5000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x14964ec162e2 in ??? +#1 0x14964ec15475 in ??? +#2 0x14964ebe590f in ??? +#3 0x14964e886d2b in ??? +#4 0x14964e8883e4 in ??? +#5 0x149644aeed1b in ??? +#6 0x149644ae8bc8 in ??? +#7 0x149644a9a9e6 in ??? +#8 0x14964ebd96e9 in ??? +#9 0x14964e95450e in ??? +#10 0xffffffffffffffff in ??? +./madX.sh: line 400: 111168 Aborted (core dumped) $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 0c2abc603a..1fc413ce73 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_13:56:17 +DATE: 2024-09-18_21:22:17 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5308s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4584s - [COUNTERS] Fortran MEs ( 1 ) : 0.0725s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s + [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4871s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s - [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3210s + [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8993s - [COUNTERS] Fortran MEs ( 1 ) : 0.7187s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9319s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4022s + [COUNTERS] Fortran MEs ( 1 ) : 0.5297s for 81920 events => throughput is 1.55E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313702859087712] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0735s for 8192 events => throughput is 1.11E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3186s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0570s for 8192 events => throughput is 1.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313702859087712) differ by less than 4E-4 (5.6840001816382824e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095770771365008] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6552s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9225s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7321s for 81920 events => throughput is 1.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 2.2457s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6017s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6436s for 81920 events => throughput is 1.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095770771365008) differ by less than 4E-4 (2.86887245071199e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127783e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.523849e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.135487e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.477916e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313700465139972] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4362s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4092s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700465139972) differ by less than 4E-4 (6.100891492000216e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095768752291760] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1752s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2678s for 81920 events => throughput is 3.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.6249s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2168s for 81920 events => throughput is 3.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768752291760) differ by less than 4E-4 (1.2439858076973564e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.031782e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.985922e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.033396e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.995161e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313700354235445] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.55E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700354235445) differ by less than 4E-4 (6.646850714275843e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095768538537163] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0338s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1338s for 81920 events => throughput is 6.12E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.5138s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4058s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1078s for 81920 events => throughput is 7.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768538537163) differ by less than 4E-4 (1.3453116110007102e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.165970e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.197191e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0322s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9087s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1232s for 81920 events => throughput is 6.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.618658e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.115197e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.353403e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4238s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.78E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.940855e+05 ) sec^-1 -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0846s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9123s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1719s for 81920 events => throughput is 4.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.731547e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814682e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,104 +370,32 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8504s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3852s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.088649e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.406235e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.833815e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.147443e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.839780e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.591152e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.570167e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.189550e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x677d070) on address 0x14fd9c0ff000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x15003322b2e2 in ??? +#1 0x15003322a475 in ??? +#2 0x1500331fa90f in ??? +#3 0x150032e9bd2b in ??? +#4 0x150032e9d3e4 in ??? +#5 0x150029103d1b in ??? +#6 0x1500290fdbc8 in ??? +#7 0x1500290af9e6 in ??? +#8 0x1500331ee6e9 in ??? +#9 0x150032f6950e in ??? +#10 0xffffffffffffffff in ??? +./madX.sh: line 400: 112178 Aborted $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index d3b173c725..9dcc27eea4 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_13:56:48 +DATE: 2024-09-18_21:22:30 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5250s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4531s - [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4947s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4413s + [COUNTERS] Fortran MEs ( 1 ) : 0.0534s for 8192 events => throughput is 1.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4812s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] Fortran MEs ( 1 ) : 0.0722s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3225s + [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6071s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8898s - [COUNTERS] Fortran MEs ( 1 ) : 0.7173s for 81920 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4177s + [COUNTERS] Fortran MEs ( 1 ) : 0.5295s for 81920 events => throughput is 1.55E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4867s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.3887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0634s for 8192 events => throughput is 1.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6750s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9006s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7737s for 81920 events => throughput is 1.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.0464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4147s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6312s for 81920 events => throughput is 1.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.066197e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323725e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081547e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.327049e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4594s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4139s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3610s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3287s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4278s for 81920 events => throughput is 1.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.7706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3605s for 81920 events => throughput is 2.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.915335e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.284786e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909065e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.391017e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701710149187] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4419s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3417s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3230s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 8192 events => throughput is 4.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701710149187) differ by less than 2E-4 (2.8022051345999444e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771374576316] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1482s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9012s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 81920 events => throughput is 3.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5974s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1836s for 81920 events => throughput is 4.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771374576316) differ by less than 2E-4 (9.478029472376193e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.313762e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.538844e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.370922e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4314s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1188s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9005s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2179s for 81920 events => throughput is 3.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.747505e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.784395e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4497s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0355s for 8192 events => throughput is 2.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.558820e+05 ) sec^-1 -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2638s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9140s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3492s for 81920 events => throughput is 2.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.324769e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.345529e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,104 +370,32 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8545s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8502s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.3685s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 81920 events => throughput is 7.58E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.938068e+06 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.128844e+06 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.264635e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.049713e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.259722e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.250826e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.279255e+07 ) sec^-1 - -*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.647946e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** - -TEST COMPLETED +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Memory access fault by GPU node-4 (Agent handle: 0x677d050) on address 0x1504b7d35000. Reason: Unknown. + +Program received signal SIGABRT: Process abort signal. + +Backtrace for this error: +#0 0x15074eea52e2 in ??? +#1 0x15074eea4475 in ??? +#2 0x15074ee7490f in ??? +#3 0x15074eb15d2b in ??? +#4 0x15074eb173e4 in ??? +#5 0x150744d7dd1b in ??? +#6 0x150744d77bc8 in ??? +#7 0x150744d299e6 in ??? +#8 0x15074ee686e9 in ??? +#9 0x15074ebe350e in ??? +#10 0xffffffffffffffff in ??? +./madX.sh: line 400: 113249 Aborted $timecmd $cmd < ${tmpin} > ${tmp} +ERROR! ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed + PDF set = nn23lo1 + alpha_s(Mz)= 0.1300 running at 2 loops. + alpha_s(Mz)= 0.1300 running at 2 loops. + Renormalization scale set on event-by-event basis + Factorization scale set on event-by-event basis + + + getting user params +Enter number of events and max and min iterations: + Number of events and iterations 8192 1 1 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index fad5d1a64f..3dabe0755c 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:32:01 +DATE: 2024-09-18_23:40:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9274s - [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1585s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1221s + [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s - [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7186s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6822s + [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0599s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5836s - [COUNTERS] Fortran MEs ( 1 ) : 0.4763s for 81920 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 5.2530s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8813s + [COUNTERS] Fortran MEs ( 1 ) : 0.3717s for 81920 events => throughput is 2.20E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755334] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4589s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4083s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0501s for 8192 events => throughput is 1.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.7307s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6898s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755334) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865325] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0776s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5087s for 81920 events => throughput is 1.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 5.0375s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4039s for 81920 events => throughput is 2.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865325) differ by less than 3E-14 (1.1102230246251565e-14) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.656917e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.067860e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653176e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.073882e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755347] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4352s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.7076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6838s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755347) differ by less than 3E-14 (8.881784197001252e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865338] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8492s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5718s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2771s for 81920 events => throughput is 2.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.8916s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6566s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 81920 events => throughput is 3.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865338) differ by less than 3E-14 (1.0436096431476471e-14) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.911436e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.562430e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.988061e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.577295e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755325] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.7106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755325) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7395s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1660s for 81920 events => throughput is 4.93E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.7585s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1269s for 81920 events => throughput is 6.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865552) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.731125e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.942521e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7429s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5869s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1556s for 81920 events => throughput is 5.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.403364e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.541810e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.439647e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.596517e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2345s for 81920 events => throughput is 3.49E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.483959e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.537527e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755356] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8526s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.9989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0078s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0160081479755330) and hip (2.0160081479755356) differ by less than 3E-14 (1.3322676295501878e-15) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865352] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0272s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 5.0429s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 81920 events => throughput is 4.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0336713375865552) and hip (2.0336713375865352) differ by less than 3E-14 (9.880984919163893e-15) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.829708e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.356738e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.382767e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.379534e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.774663e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.584016e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.124992e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.300114e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.755835e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.592842e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.430950e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.330570e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.756916e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.584311e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513302e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.664638e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 4984f73b96..c022668690 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:32:29 +DATE: 2024-09-18_23:40:55 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9597s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9125s - [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0731s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0367s + [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4554s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s - [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7254s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6890s + [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0424s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5675s - [COUNTERS] Fortran MEs ( 1 ) : 0.4749s for 81920 events => throughput is 1.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 5.0374s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6751s + [COUNTERS] Fortran MEs ( 1 ) : 0.3623s for 81920 events => throughput is 2.26E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,34 +124,34 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160406546722180] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.4547s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.7172s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6812s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160406546722180) differ by less than 4E-4 (1.61242883456314e-05) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02 0.54771239790E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002893E+02 0.63925016162E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762567893E+01 0.62166723101E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499550E+02 0.71320499550E+02 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239731E+02 0.54771239731E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002962E+02 0.63925016178E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762568567E+01 0.62166723103E+02 0.47000000000E+01 0. -1. < < 0 0.12500099E+03 < 0 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index e45c8953e0..088580d866 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:32:35 +DATE: 2024-09-18_23:41:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9574s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9096s - [COUNTERS] Fortran MEs ( 1 ) : 0.0478s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.0767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0402s + [COUNTERS] Fortran MEs ( 1 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s - [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7095s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6731s + [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0377s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5636s - [COUNTERS] Fortran MEs ( 1 ) : 0.4741s for 81920 events => throughput is 1.73E+05 events/s + [COUNTERS] PROGRAM TOTAL : 5.0318s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6691s + [COUNTERS] Fortran MEs ( 1 ) : 0.3627s for 81920 events => throughput is 2.26E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453460] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4555s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0511s for 8192 events => throughput is 1.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.7211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6802s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453460) differ by less than 2E-4 (2.4042468904639236e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,25 +160,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200616] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0631s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5575s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5052s for 81920 events => throughput is 1.62E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 5.0505s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4049s for 81920 events => throughput is 2.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200616) differ by less than 2E-4 (2.297987178323524e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -187,15 +187,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.544125e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.031830e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558149e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.034344e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,25 +209,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453469] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453469) differ by less than 2E-4 (2.4042469348728446e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -245,25 +245,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200620] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8495s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5723s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2768s for 81920 events => throughput is 2.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 5.1226s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8775s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2449s for 81920 events => throughput is 3.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200620) differ by less than 2E-4 (2.2979872005279844e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -272,15 +272,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.837690e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.527175e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.886878e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.537960e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,25 +294,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081962974865] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6901s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081962974865) differ by less than 2E-4 (2.3969126017320264e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -330,25 +330,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713836598834] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7266s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1685s for 81920 events => throughput is 4.86E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.7431s + [COUNTERS] Fortran Overhead ( 0 ) : 4.6181s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1248s for 81920 events => throughput is 6.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713836598834) differ by less than 2E-4 (2.2655247899905362e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -357,187 +357,23 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.779100e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.788127e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4240s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7202s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5626s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1572s for 81920 events => throughput is 5.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.210846e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.727444e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.166322e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.761728e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4371s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4123s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.8082s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5683s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2395s for 81920 events => throughput is 3.42E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.192275e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.206869e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -549,30 +385,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081483021464] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8489s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.0475s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.26E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0077s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) +OK! xsec from fortran (2.0160081479755330) and hip (2.0160081483021464) differ by less than 2E-4 (1.6200996100224074e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -584,69 +420,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713380111582] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0087s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9990s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.13E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 4.9959s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 81920 events => throughput is 4.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10) +OK! xsec from fortran (2.0336713375865552) and hip (2.0336713380111582) differ by less than 2E-4 (2.0878654360956261e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.939022e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.361816e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.244768e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.392956e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.749489e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577296e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.094535e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.308106e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.768020e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.595061e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.372316e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.923460e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.761406e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.570017e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.486762e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.696241e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index cf925a09c6..e6b5f839f6 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:35:48 +DATE: 2024-09-18_23:43:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.6643s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3661s - [COUNTERS] Fortran MEs ( 1 ) : 2.2982s for 8192 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3589s + [COUNTERS] Fortran MEs ( 1 ) : 1.5550s for 8192 events => throughput is 5.27E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.6579s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3588s - [COUNTERS] Fortran MEs ( 1 ) : 2.2991s for 8192 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s + [COUNTERS] Fortran MEs ( 1 ) : 1.5810s for 8192 events => throughput is 5.18E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.0583s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0507s - [COUNTERS] Fortran MEs ( 1 ) : 23.0076s for 81920 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4649s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4800s + [COUNTERS] Fortran MEs ( 1 ) : 15.9850s for 81920 events => throughput is 5.12E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728557E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.8350s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3611s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4687s for 8192 events => throughput is 3.32E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s + [COUNTERS] PROGRAM TOTAL : 2.1861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3259s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8564s for 8192 events => throughput is 4.41E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728557E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898222E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 26.8636s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0489s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.8095s for 81920 events => throughput is 3.30E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [COUNTERS] PROGRAM TOTAL : 20.0082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4968s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.5079s for 81920 events => throughput is 4.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0035s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898222E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.460402e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.607657e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.457448e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.561572e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6570s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2927s for 8192 events => throughput is 6.34E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 1.5197s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3171s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2005s for 8192 events => throughput is 6.82E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728536E-007) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898275E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 15.0457s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0488s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.9941s for 81920 events => throughput is 6.30E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [COUNTERS] PROGRAM TOTAL : 11.0354s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4918s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5415s for 81920 events => throughput is 8.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898275E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.568599e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.858280e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.542585e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.830106e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728525E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9401s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5754s for 8192 events => throughput is 1.42E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 0.7418s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4395s for 8192 events => throughput is 1.86E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728525E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898233E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.7938s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0478s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7444s for 81920 events => throughput is 1.43E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 6.0773s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6733s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4029s for 81920 events => throughput is 1.86E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898233E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.465958e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.451297e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5129s for 8192 events => throughput is 1.60E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.1685s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0408s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.1262s for 81920 events => throughput is 1.60E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.660633e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.902213e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.663487e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0357s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6717s for 8192 events => throughput is 1.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.903178e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.7351s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0383s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.6950s for 81920 events => throughput is 1.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.242378e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.242909e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728514E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s + [COUNTERS] PROGRAM TOTAL : 0.7605s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0651s for 8192 events => throughput is 1.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0533s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610362728514E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6736s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1765s for 81920 events => throughput is 4.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s + [COUNTERS] PROGRAM TOTAL : 2.5715s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6057s for 81920 events => throughput is 1.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0530s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926582898244E-007) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.229187e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.290124e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.527847e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.275778e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.819324e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.619452e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.226919e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.435666e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.844216e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.620393e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.225190e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.610894e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.847840e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.620701e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.681732e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.954475e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index d625debf72..2091e4d6ea 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:38:08 +DATE: 2024-09-18_23:45:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.6507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3559s - [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2852s + [COUNTERS] Fortran MEs ( 1 ) : 1.5759s for 8192 events => throughput is 5.20E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.6503s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s - [COUNTERS] Fortran MEs ( 1 ) : 2.2909s for 8192 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3796s + [COUNTERS] Fortran MEs ( 1 ) : 1.7007s for 8192 events => throughput is 4.82E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.0293s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0436s - [COUNTERS] Fortran MEs ( 1 ) : 22.9857s for 81920 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.2537s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4920s + [COUNTERS] Fortran MEs ( 1 ) : 15.7617s for 81920 events => throughput is 5.20E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381684214474469E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7985s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3626s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4310s for 8192 events => throughput is 3.37E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s + [COUNTERS] PROGRAM TOTAL : 2.0939s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7855s for 8192 events => throughput is 4.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0042s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381684214474469E-007) differ by less than 4E-4 (9.668786189465095e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542976447681378E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 26.3775s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0419s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.3307s for 81920 events => throughput is 3.37E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 19.5765s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4924s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.0804s for 81920 events => throughput is 4.53E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542976447681378E-007) differ by less than 4E-4 (6.514616746056134e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.486852e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.699917e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.494086e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.716433e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381673102586798E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6777s for 8192 events => throughput is 1.21E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 0.7975s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4895s for 8192 events => throughput is 1.67E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381673102586798E-007) differ by less than 4E-4 (8.214000459805249e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542965612263376E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.8470s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0545s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.7910s for 81920 events => throughput is 1.21E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 6.4322s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4898s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9413s for 81920 events => throughput is 1.66E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542965612263376E-007) differ by less than 4E-4 (5.09901657563816e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.236746e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.671993e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.228135e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.674722e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381674937970992E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6595s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2926s for 8192 events => throughput is 2.80E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.7009s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 8192 events => throughput is 3.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381674937970992E-007) differ by less than 4E-4 (8.454291831050398e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542993199513089E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.0070s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0356s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.9705s for 81920 events => throughput is 2.76E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 3.7840s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4892s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2941s for 81920 events => throughput is 3.57E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542993199513089E-007) differ by less than 4E-4 (8.703170601975785e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.852598e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.877017e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6241s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2632s for 8192 events => throughput is 3.11E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 4.6833s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0329s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.6497s for 81920 events => throughput is 3.09E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.188563e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.680556e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.189121e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3592s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3373s for 8192 events => throughput is 2.43E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.684569e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.4012s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0331s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3672s for 81920 events => throughput is 2.43E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.455496e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.453904e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381687553340853E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8395s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8026s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s + [COUNTERS] PROGRAM TOTAL : 0.9693s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0538s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381687553340853E-007) differ by less than 4E-4 (1.0105915801972287e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6543007309341497E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6550s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1594s for 81920 events => throughput is 5.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s + [COUNTERS] PROGRAM TOTAL : 2.1839s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3184s for 81920 events => throughput is 2.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0527s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06) +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6543007309341497E-007) differ by less than 4E-4 (1.0546558233404113e-06) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.221158e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.348920e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.431078e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.428627e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300814e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.015915e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323922e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.017256e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.295837e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.015001e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.322906e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.020010e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.292673e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.013211e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.656202e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.487524e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index e6874f3a32..a3e817e7ae 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:40:03 +DATE: 2024-09-18_23:46:47 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.6497s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3549s - [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8701s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2841s + [COUNTERS] Fortran MEs ( 1 ) : 1.5859s for 8192 events => throughput is 5.17E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.6518s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s - [COUNTERS] Fortran MEs ( 1 ) : 2.2945s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2969s + [COUNTERS] Fortran MEs ( 1 ) : 1.5834s for 8192 events => throughput is 5.17E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.0481s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0523s - [COUNTERS] Fortran MEs ( 1 ) : 22.9958s for 81920 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.4312s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4913s + [COUNTERS] Fortran MEs ( 1 ) : 15.9399s for 81920 events => throughput is 5.14E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608764955570E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.8581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3622s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4908s for 8192 events => throughput is 3.29E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 2.1419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3091s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8290s for 8192 events => throughput is 4.48E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608764955570E-007) differ by less than 2E-4 (2.0918293763827478e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925018181723E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 27.0498s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0486s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.9962s for 81920 events => throughput is 3.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 20.1982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5029s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.6915s for 81920 events => throughput is 4.38E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925018181723E-007) differ by less than 2E-4 (2.0442339820903044e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.436817e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.596590e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.435512e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.600471e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608686521537E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6335s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3586s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2723s for 8192 events => throughput is 6.44E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s + [COUNTERS] PROGRAM TOTAL : 1.2415s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9349s for 8192 events => throughput is 8.76E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608686521537E-007) differ by less than 2E-4 (2.194516446341055e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542924921991233E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 14.8859s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0643s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8189s for 81920 events => throughput is 6.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 10.8267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5226s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.3021s for 81920 events => throughput is 8.81E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542924921991233E-007) differ by less than 2E-4 (2.1699026797605825e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.796385e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.009209e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.784638e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.006276e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200382E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9347s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5694s for 8192 events => throughput is 1.44E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 0.7302s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2904s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4382s for 8192 events => throughput is 1.87E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608826200382E-007) differ by less than 2E-4 (2.0116467158715068e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010384E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.7335s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0485s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.6834s for 81920 events => throughput is 1.44E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 6.1080s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5753s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5316s for 81920 events => throughput is 1.81E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925056010384E-007) differ by less than 2E-4 (1.9948124929669575e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.478402e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.954394e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.467295e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8664s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5018s for 8192 events => throughput is 1.63E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.916441e+04 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.0416s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0429s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9973s for 81920 events => throughput is 1.64E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.693290e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.684165e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610372590265E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0424s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3606s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6800s for 8192 events => throughput is 1.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s + [COUNTERS] PROGRAM TOTAL : 0.7523s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6344s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0647s for 8192 events => throughput is 1.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0532s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610372590265E-007) differ by less than 2E-4 (1.2911138824733825e-10) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,149 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926581386322E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.8421s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0543s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.7861s for 81920 events => throughput is 1.21E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.218603e+04 ) sec^-1 + [COUNTERS] PROGRAM TOTAL : 2.4759s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8191s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6025s for 81920 events => throughput is 1.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0543s -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233808e+04 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7998s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 8192 events => throughput is 4.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926581386322E-007) differ by less than 2E-4 (1.9752643964920935e-11) -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6799s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4834s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1767s for 81920 events => throughput is 4.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.207155e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.295429e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.529302e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.278295e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.824963e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.606939e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.207520e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.436405e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.824989e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.607369e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.199605e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.609351e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.829686e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.602579e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.672241e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.957889e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index a3ffe665a4..43f1f62670 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_17:34:28 +DATE: 2024-09-18_23:42:39 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6849s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4210s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4125s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.64E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3218s + [COUNTERS] Fortran MEs ( 1 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5624s - [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2367s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1766s + [COUNTERS] Fortran MEs ( 1 ) : 0.0601s for 81920 events => throughput is 1.36E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4282s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4193s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6471s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5643s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2610s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0740s for 81920 events => throughput is 1.11E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.967649e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.160346e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.004982e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.172468e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.80E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3512s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6254s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 81920 events => throughput is 1.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1909s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 81920 events => throughput is 2.30E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903119e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.477602e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.966510e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.535827e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4171s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3494s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3470s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207294] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5995s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 81920 events => throughput is 2.85E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2174s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1961s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 81920 events => throughput is 3.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207294) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.156830e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.269475e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4140s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5963s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5689s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.272274e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.300387e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.518220e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4193s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4158s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.499613e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6175s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 81920 events => throughput is 2.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.808476e+06 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.104718e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8589s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6457s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0066s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449452343426120) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0146s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.04E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5016s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.93E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0064s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.114496e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.960174e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.342641e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.418677e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.313778e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.644247e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.335231e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.424249e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.321322e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.758689e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.755462e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.384808e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.202913e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.177573e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.640874e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 6af3b55835..b057d3eb24 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_17:34:55 +DATE: 2024-09-18_23:42:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6749s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.63E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5186s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4146s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3303s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3242s + [COUNTERS] Fortran MEs ( 1 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6556s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5716s - [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2369s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1766s + [COUNTERS] Fortran MEs ( 1 ) : 0.0603s for 81920 events => throughput is 1.36E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446601800423] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4261s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3427s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.29E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446601800423) differ by less than 4E-4 (1.8856252759213987e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305123565710] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6758s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 81920 events => throughput is 9.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.2649s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 81920 events => throughput is 1.29E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305123565710) differ by less than 4E-4 (1.8208556928911435e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013895e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.384737e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.014072e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.394396e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446481959741] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4141s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.96E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3438s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3416s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446481959741) differ by less than 4E-4 (1.924982528933583e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305120129920] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6056s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 81920 events => throughput is 3.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.2097s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1894s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 81920 events => throughput is 4.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305120129920) differ by less than 4E-4 (1.8219731212631984e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.210079e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.493283e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.272367e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.612914e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446707997274] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4185s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.66E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446707997274) differ by less than 4E-4 (1.8507488352970114e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305200358782] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6070s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 81920 events => throughput is 4.35E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.2303s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 81920 events => throughput is 5.96E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305200358782) differ by less than 4E-4 (1.7958801523665358e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.874017e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.300612e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4142s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4122s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.54E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5999s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 81920 events => throughput is 4.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.302533e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.770097e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.611044e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4176s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4150s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.178956e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6057s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5839s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 81920 events => throughput is 3.82E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.359914e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.606033e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446257236112] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8553s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6446s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449446257236112) differ by less than 4E-4 (1.998784719958735e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747304644712603] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0187s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0105s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.06E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5658s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.04E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747304644712603) differ by less than 4E-4 (1.9765939007765354e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.132925e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.558902e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.463748e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.658289e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.452376e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.011473e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.074682e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.555927e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.432547e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.215146e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996097e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.540892e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.096714e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.596411e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.649902e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.881194e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index d3c2ed78ae..bd5c094abb 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_17:35:21 +DATE: 2024-09-18_23:43:15 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6841s - [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.39E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5137s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5079s + [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4135s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3338s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3277s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5746s - [COUNTERS] Fortran MEs ( 1 ) : 0.0847s for 81920 events => throughput is 9.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.2345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1750s + [COUNTERS] Fortran MEs ( 1 ) : 0.0595s for 81920 events => throughput is 1.38E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4261s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4172s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3582s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3505s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6599s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0848s for 81920 events => throughput is 9.66E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2694s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1945s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 81920 events => throughput is 1.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.812593e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.150780e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.833045e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.165133e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4168s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6345s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5889s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 81920 events => throughput is 1.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2237s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 81920 events => throughput is 2.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.934567e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.685612e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.973267e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.739132e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453251780906] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4204s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.90E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453251780906) differ by less than 2E-4 (2.98315638858071e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311628550072] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6044s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 81920 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.2074s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1871s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 81920 events => throughput is 4.08E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311628550072) differ by less than 2E-4 (2.947714006218405e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181731e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488610e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4215s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.97E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6145s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5872s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 81920 events => throughput is 3.04E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.215852e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.508719e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.572337e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.662207e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6110s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5806s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0301s for 81920 events => throughput is 2.73E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.871798e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.069203e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452360186241] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8521s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0065s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449452360186241) differ by less than 2E-4 (5.504243727472158e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310720557375] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0158s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0075s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.5096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4940s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747310720557375) differ by less than 2E-4 (5.366040944920769e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.203370e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.570605e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.619962e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.465715e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.249341e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.776763e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.345507e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.434433e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.257709e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.877036e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.785764e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.445366e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.388728e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.175182e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.652732e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index e14403d083..a40b232bb0 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:33:04 +DATE: 2024-09-18_23:41:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8478s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8053s - [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8085s + [COUNTERS] Fortran MEs ( 1 ) : 0.0309s for 8192 events => throughput is 2.65E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s - [COUNTERS] Fortran MEs ( 1 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3585s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3262s + [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9546s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5320s - [COUNTERS] Fortran MEs ( 1 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4874s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1704s + [COUNTERS] Fortran MEs ( 1 ) : 0.3171s for 81920 events => throughput is 2.58E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4564s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0447s for 8192 events => throughput is 1.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3877s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3519s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846950) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9966s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4479s for 81920 events => throughput is 1.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5327s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3526s for 81920 events => throughput is 2.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.854967e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.346198e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.848854e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.355296e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4357s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3675s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8082s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5526s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2552s for 81920 events => throughput is 3.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1757s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2088s for 81920 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.238674e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.976615e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.323356e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.995377e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4251s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3623s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.63E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,110 +329,36 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6875s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5321s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1550s for 81920 events => throughput is 5.28E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.5417s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4046s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1369s for 81920 events => throughput is 5.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.205465e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.876235e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.341963e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.506247e+05 ) sec^-1 -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6853s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1419s for 81920 events => throughput is 5.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.704465e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.807181e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,9 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -454,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6778s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0069s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846950) and hip (44.641911695846950) differ by less than 3E-14 (0.0) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,89 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7690s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2250s for 81920 events => throughput is 3.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.577527e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.629127e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9952s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9855s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 81920 events => throughput is 9.15E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.4598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4351s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 81920 events => throughput is 4.71E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.473264592444679) and hip (44.473264592444679) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.949285e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.361469e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.317105e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.393659e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.829932e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.804930e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.548750e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.565191e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.793745e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.834506e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.913836e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.303015e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.818636e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.811147e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.654381e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.075363e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index a972218890..9633ce81ed 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:33:33 +DATE: 2024-09-18_23:42:02 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s - [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6079s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5786s + [COUNTERS] Fortran MEs ( 1 ) : 0.0293s for 8192 events => throughput is 2.80E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4474s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4056s - [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3537s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3240s + [COUNTERS] Fortran MEs ( 1 ) : 0.0298s for 8192 events => throughput is 2.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9534s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5332s - [COUNTERS] Fortran MEs ( 1 ) : 0.4202s for 81920 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4902s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1703s + [COUNTERS] Fortran MEs ( 1 ) : 0.3199s for 81920 events => throughput is 2.56E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641905397892330] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4536s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3817s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641905397892330) differ by less than 4E-4 (1.4107717127842534e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473258075185306] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9508s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5325s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4180s for 81920 events => throughput is 1.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.5058s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3155s for 81920 events => throughput is 2.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473258075185306) differ by less than 4E-4 (1.465433093761348e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.983462e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.618196e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.977072e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.656559e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902617887730] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4098s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641902617887730) differ by less than 4E-4 (2.0335059314202653e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255619824656] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7014s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5300s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1712s for 81920 events => throughput is 4.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.3344s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1473s for 81920 events => throughput is 5.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473255619824656) differ by less than 4E-4 (2.0175312298587045e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.686204e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.888623e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.729824e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.714344e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902771385062] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4171s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641902771385062) differ by less than 4E-4 (1.9991218003223565e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255186065366] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6247s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5346s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 81920 events => throughput is 9.11E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.2562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0726s for 81920 events => throughput is 1.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473255186065366) differ by less than 4E-4 (2.1150638251921094e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.731978e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.141403e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4099s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6246s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5387s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0857s for 81920 events => throughput is 9.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.618487e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.181203e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.810592e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4241s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.53E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.191155e+06 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6706s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5504s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1199s for 81920 events => throughput is 6.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.869027e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899392e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641905467548966] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8527s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6481s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6360s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08) +OK! xsec from fortran (44.641911695846950) and hip (44.641905467548966) differ by less than 4E-4 (1.3951682953372568e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473257658055729] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9894s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.03E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.5008s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 81920 events => throughput is 9.33E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08) +OK! xsec from fortran (44.473264592444679) and hip (44.473257658055729) differ by less than 4E-4 (1.5592264279717938e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.991468e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.671756e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.344514e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.663884e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.881682e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.230603e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.350971e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.789505e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.826185e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.128565e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.350870e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.600395e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.507679e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.735657e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.018982e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.320347e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index f3cbf0c54f..1f9016e379 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_17:34:00 +DATE: 2024-09-18_23:42:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7908s - [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6008s + [COUNTERS] Fortran MEs ( 1 ) : 0.0318s for 8192 events => throughput is 2.57E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4514s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s - [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3618s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s + [COUNTERS] Fortran MEs ( 1 ) : 0.0319s for 8192 events => throughput is 2.56E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9646s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s - [COUNTERS] Fortran MEs ( 1 ) : 0.4230s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7631s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4052s + [COUNTERS] Fortran MEs ( 1 ) : 0.3579s for 81920 events => throughput is 2.29E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404211] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404211) differ by less than 2E-4 (2.783387209603916e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9868s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4502s for 81920 events => throughput is 1.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1902s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3568s for 81920 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.844411e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.316075e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.856447e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.332670e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404225] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4369s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4119s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3652s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3440s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404225) differ by less than 2E-4 (2.7833872318083763e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265850735238] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7808s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5352s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2452s for 81920 events => throughput is 3.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.3961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2091s for 81920 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735238) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.286947e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.994557e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.353817e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.010766e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912966309015] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4259s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912966309015) differ by less than 2E-4 (2.8458952971988083e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265882025295] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6932s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 81920 events => throughput is 5.39E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.2345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1223s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1120s for 81920 events => throughput is 7.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265882025295) differ by less than 2E-4 (2.899676077028346e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.355089e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.182707e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.376897e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.929158e+05 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6947s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5520s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1424s for 81920 events => throughput is 5.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.878311e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.966522e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,89 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4311s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7576s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5385s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2187s for 81920 events => throughput is 3.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.691521e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.773666e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -534,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.6162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10) +OK! xsec from fortran (44.641911695846950) and hip (44.641911674225568) differ by less than 2E-4 (4.843292433776014e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9812s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.08E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.3998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 81920 events => throughput is 4.73E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0077s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10) +OK! xsec from fortran (44.473264592444679) and hip (44.473264587763374) differ by less than 2E-4 (1.0526113314313079e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.958191e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.356779e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.401140e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.429203e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.815576e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.784466e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.499893e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.529124e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.820308e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.775937e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.845220e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.224497e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.813891e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.763929e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.729165e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.982810e+06 ) sec^-1 TEST COMPLETED From c4164febc8fface4eafdaf0d6c19755f79fdb80c Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 19 Sep 2024 10:51:11 +0200 Subject: [PATCH 63/76] [amd] ** COMPLETE AMD ** revert to itscrd90 logs for tput/tmad tests Revert "[amd] rerun 30 tmad tests on LUMI against AMD GPUs - all as expected (heft fail #833, skip ggttggg #933, gqttq crash #806)" This reverts commit 458b834efe40c216461f15a38e7943716e6edc83. Revert "[amd] rerun 96 tput builds and tests on LUMI worker node (small-g 72h) - all as expected" This reverts commit 0c947d1d519dd12d5f54e260a846f34ddf2ae76e. --- .../log_eemumu_mad_d_inl0_hrd0.txt | 442 +++++++++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 464 ++++++++++------ .../log_eemumu_mad_m_inl0_hrd0.txt | 460 ++++++++++------ .../log_ggtt_mad_d_inl0_hrd0.txt | 452 ++++++++++------ .../log_ggtt_mad_f_inl0_hrd0.txt | 458 ++++++++++------ .../log_ggtt_mad_m_inl0_hrd0.txt | 454 ++++++++++------ .../log_ggttg_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 464 ++++++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 460 ++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 463 ++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 488 +++++++++++++---- .../log_ggttggg_mad_f_inl0_hrd0.txt | 492 +++++++++++++---- .../log_ggttggg_mad_m_inl0_hrd0.txt | 488 +++++++++++++---- .../log_gqttq_mad_d_inl0_hrd0.txt | 500 +++++++++++++----- .../log_gqttq_mad_f_inl0_hrd0.txt | 498 ++++++++++++----- .../log_gqttq_mad_m_inl0_hrd0.txt | 496 ++++++++++++----- .../log_heftggbb_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 98 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 472 +++++++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 462 ++++++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 462 ++++++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 446 +++++++++++----- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 456 ++++++++++------ .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 456 ++++++++++------ .../log_susyggtt_mad_d_inl0_hrd0.txt | 456 ++++++++++------ .../log_susyggtt_mad_f_inl0_hrd0.txt | 462 ++++++++++------ .../log_susyggtt_mad_m_inl0_hrd0.txt | 458 ++++++++++------ .../log_eemumu_mad_d_inl0_hrd0.txt | 258 +++++---- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 276 ++++++---- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 244 ++++++--- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 261 +++++---- .../log_eemumu_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_eemumu_mad_d_inl1_hrd0.txt | 258 +++++---- .../log_eemumu_mad_d_inl1_hrd1.txt | 258 +++++---- .../log_eemumu_mad_f_inl0_hrd0.txt | 268 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 284 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 254 +++++---- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 269 ++++++---- .../log_eemumu_mad_f_inl0_hrd1.txt | 268 ++++++---- .../log_eemumu_mad_f_inl1_hrd0.txt | 268 ++++++---- .../log_eemumu_mad_f_inl1_hrd1.txt | 268 ++++++---- .../log_eemumu_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_eemumu_mad_m_inl0_hrd1.txt | 254 +++++---- .../log_ggtt_mad_d_inl0_hrd0.txt | 254 +++++---- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 272 ++++++---- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 240 ++++++--- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 257 +++++---- .../log_ggtt_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_ggtt_mad_d_inl1_hrd0.txt | 254 +++++---- .../log_ggtt_mad_d_inl1_hrd1.txt | 254 +++++---- .../log_ggtt_mad_f_inl0_hrd0.txt | 272 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 290 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 264 +++++---- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 275 ++++++---- .../log_ggtt_mad_f_inl0_hrd1.txt | 272 ++++++---- .../log_ggtt_mad_f_inl1_hrd0.txt | 272 ++++++---- .../log_ggtt_mad_f_inl1_hrd1.txt | 272 ++++++---- .../log_ggtt_mad_m_inl0_hrd0.txt | 258 +++++---- .../log_ggtt_mad_m_inl0_hrd1.txt | 258 +++++---- .../log_ggttg_mad_d_inl0_hrd0.txt | 293 ++++++---- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 315 ++++++----- .../log_ggttg_mad_d_inl0_hrd1.txt | 293 ++++++---- .../log_ggttg_mad_f_inl0_hrd0.txt | 301 +++++++---- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 323 ++++++----- .../log_ggttg_mad_f_inl0_hrd1.txt | 301 +++++++---- .../log_ggttg_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_ggttg_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0.txt | 285 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 307 +++++++---- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 290 ++++++---- .../log_ggttgg_mad_d_inl0_hrd1.txt | 285 ++++++---- .../log_ggttgg_mad_d_inl1_hrd0.txt | 289 ++++++---- .../log_ggttgg_mad_d_inl1_hrd1.txt | 293 ++++++---- .../log_ggttgg_mad_f_inl0_hrd0.txt | 301 +++++++---- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 323 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 295 +++++++---- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 306 ++++++----- .../log_ggttgg_mad_f_inl0_hrd1.txt | 299 +++++++---- .../log_ggttgg_mad_f_inl1_hrd0.txt | 297 +++++++---- .../log_ggttgg_mad_f_inl1_hrd1.txt | 297 +++++++---- .../log_ggttgg_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_ggttgg_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 ++++++--- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 ++++++--- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 ++++++--- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++++--- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 +++++++--- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++++--- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 +++++--- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 +++++--- .../log_gqttq_mad_d_inl0_hrd0.txt | 269 ++++++++-- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 289 ++++++++-- .../log_gqttq_mad_d_inl0_hrd1.txt | 269 ++++++++-- .../log_gqttq_mad_f_inl0_hrd0.txt | 269 ++++++++-- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 289 ++++++++-- .../log_gqttq_mad_f_inl0_hrd1.txt | 269 ++++++++-- .../log_gqttq_mad_m_inl0_hrd0.txt | 269 ++++++++-- .../log_gqttq_mad_m_inl0_hrd1.txt | 269 ++++++++-- .../log_heftggbb_mad_d_inl0_hrd0.txt | 254 +++++---- .../log_heftggbb_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_heftggbb_mad_f_inl0_hrd0.txt | 270 ++++++---- .../log_heftggbb_mad_f_inl0_hrd1.txt | 272 ++++++---- .../log_heftggbb_mad_m_inl0_hrd0.txt | 252 ++++++--- .../log_heftggbb_mad_m_inl0_hrd1.txt | 252 ++++++--- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 281 ++++++---- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 281 ++++++---- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 301 +++++++---- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 301 +++++++---- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 250 +++++---- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 250 +++++---- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 262 +++++---- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 262 +++++---- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 254 +++++---- .../log_susyggtt_mad_d_inl0_hrd0.txt | 258 +++++---- .../log_susyggtt_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_susyggtt_mad_f_inl0_hrd0.txt | 270 ++++++---- .../log_susyggtt_mad_f_inl0_hrd1.txt | 270 ++++++---- .../log_susyggtt_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_susyggtt_mad_m_inl0_hrd1.txt | 254 +++++---- 126 files changed, 26531 insertions(+), 13128 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index d492b5ffc7..a32be077f9 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_21:09:23 +DATE: 2024-09-18_13:40:30 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5500s - [COUNTERS] Fortran MEs ( 1 ) : 0.0062s for 8192 events => throughput is 1.33E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7399s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1514s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1453s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2197s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2120s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3318s - [COUNTERS] Fortran MEs ( 1 ) : 0.0604s for 81920 events => throughput is 1.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7224s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s + [COUNTERS] Fortran MEs ( 1 ) : 0.0748s for 81920 events => throughput is 1.10E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1621s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3923s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3308s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0614s for 81920 events => throughput is 1.33E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0720s for 81920 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.392236e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.150298e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.408701e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.170213e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2544s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.08E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.87E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,9 +239,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0388s for 81920 events => throughput is 2.11E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 81920 events => throughput is 1.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -263,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.211611e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.910014e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.233261e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.998657e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,14 +294,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1501s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.40E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3615s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 81920 events => throughput is 3.03E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 81920 events => throughput is 2.45E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -343,22 +343,96 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235818e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.599120e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.340621e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.638604e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 81920 events => throughput is 2.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.664038e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.739981e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.4669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0063s + [COUNTERS] PROGRAM TOTAL : 0.2192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 2.02E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789448173971E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +479,149 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6451s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6280s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 81920 events => throughput is 7.72E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0065s + [COUNTERS] PROGRAM TOTAL : 0.6880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0401s for 81920 events => throughput is 2.05E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.061679e+06 ) sec^-1 -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.165474e+06 ) sec^-1 -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103909519892E-002) differ by less than 3E-14 (0.0) +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.6554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 1.0937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.06E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.484588e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.180467e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.562941e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.444487e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.728023e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.131686e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.985276e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.605423e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.731572e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.162553e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.884754e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.757987e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.728413e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.185886e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.564490e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.069069e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 960f3f0cd1..d760c23b34 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_21:09:36 +DATE: 2024-09-18_13:40:49 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5256s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5195s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7356s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1528s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1467s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2147s + [COUNTERS] Fortran MEs ( 1 ) : 0.0076s for 8192 events => throughput is 1.08E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3857s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3269s - [COUNTERS] Fortran MEs ( 1 ) : 0.0588s for 81920 events => throughput is 1.39E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7385s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6605s + [COUNTERS] Fortran MEs ( 1 ) : 0.0780s for 81920 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432776035199060E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 8192 events => throughput is 1.57E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432776035199060E-002) differ by less than 4E-4 (1.4511057155885965e-07) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711090687154856E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3852s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0518s for 81920 events => throughput is 1.58E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0680s for 81920 events => throughput is 1.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711090687154856E-002) differ by less than 4E-4 (1.4417409099909406e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.768544e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.221041e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.742150e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.228624e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432793908398633E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1502s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2147s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2117s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793908398633E-002) differ by less than 4E-4 (4.8253706141920816e-08) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711108423277371E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 81920 events => throughput is 3.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0275s for 81920 events => throughput is 2.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108423277371E-002) differ by less than 4E-4 (4.921713170347175e-08) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.922218e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.101743e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.998544e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.221746e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432793820194981E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1509s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793820194981E-002) differ by less than 4E-4 (4.729945990433748e-08) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711108407854763E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3555s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3354s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 81920 events => throughput is 4.10E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 81920 events => throughput is 3.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108407854763E-002) differ by less than 4E-4 (4.904896666602099e-08) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.393817e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.474277e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.584047e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2174s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2146s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6505s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0241s for 81920 events => throughput is 3.40E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.456987e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.548344e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.708350e+06 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2238s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2208s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 81920 events => throughput is 3.19E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.378249e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.571882e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432778459280288E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.5712s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0058s + [COUNTERS] PROGRAM TOTAL : 0.6559s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6524s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432778459280288E-002) differ by less than 4E-4 (1.1888523265835005e-07) +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711093172690286E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 81920 events => throughput is 9.68E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s + [COUNTERS] PROGRAM TOTAL : 1.0956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 81920 events => throughput is 1.07E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711093172690286E-002) differ by less than 4E-4 (1.1707229707891287e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584467e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.223914e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.552819e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.489581e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.782998e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.046619e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.597196e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.917172e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.758103e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.064257e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.537742e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.895930e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.157651e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.656385e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.529571e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.636564e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 6cb007d911..3678e8e364 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum + +make USEBUILDDIR=1 BACKEND=cuda + -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-09-18_21:09:48 +DATE: 2024-09-18_13:41:08 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.5285s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5224s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7466s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7391s + [COUNTERS] Fortran MEs ( 1 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1532s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1471s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2178s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3961s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3355s - [COUNTERS] Fortran MEs ( 1 ) : 0.0605s for 81920 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7260s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6506s + [COUNTERS] Fortran MEs ( 1 ) : 0.0754s for 81920 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1552s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2127s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3902s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0601s for 81920 events => throughput is 1.36E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 81920 events => throughput is 1.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.411192e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.133764e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.482269e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.147681e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3719s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3339s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0379s for 81920 events => throughput is 2.16E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6934s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 81920 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.275066e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.996644e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.326149e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.048925e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494401E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1522s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1494s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.96E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444494401E-002) differ by less than 2E-4 (3.980804574865715e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063479E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.5429s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5088s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 81920 events => throughput is 2.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0329s for 81920 events => throughput is 2.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063479E-002) differ by less than 2E-4 (1.1401468658078784e-10) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.197053e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.590922e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.665063e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2133s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6530s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 81920 events => throughput is 2.45E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.604540e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.283314e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.589532e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2134s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6977s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0378s for 81920 events => throughput is 2.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.201898e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.266955e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789437826984E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.4473s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4354s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.43E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 0.6541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789437826984E-002) differ by less than 2E-4 (1.1194067894848558e-10) +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 81920 events => throughput is 7.91E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 1.0910s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 81920 events => throughput is 9.79E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.524793e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.081337e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.562476e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.286137e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.751283e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.251289e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.037405e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.774363e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.765527e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.280291e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.896806e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.840047e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.709964e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.258897e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.586592e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.124478e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 3f2d9bdd43..21d2f45edf 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_21:10:00 +DATE: 2024-09-18_13:41:27 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.7231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6912s - [COUNTERS] Fortran MEs ( 1 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8485s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3203s - [COUNTERS] Fortran MEs ( 1 ) : 0.0292s for 8192 events => throughput is 2.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4077s + [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5007s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1830s - [COUNTERS] Fortran MEs ( 1 ) : 0.3177s for 81920 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5349s + [COUNTERS] Fortran MEs ( 1 ) : 0.4212s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3723s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3367s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0354s for 8192 events => throughput is 2.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268150] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1760s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3505s for 81920 events => throughput is 2.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4468s for 81920 events => throughput is 1.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268150) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.370663e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.851914e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352510e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.872591e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3589s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3376s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4102s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268164] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.4468s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2378s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2088s for 81920 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5386s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2515s for 81920 events => throughput is 3.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268164) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.992808e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.305635e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.008058e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.371218e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.66E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4039s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2924s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1706s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1216s for 81920 events => throughput is 6.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7022s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1579s for 81920 events => throughput is 5.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.897510e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.172913e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943522e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.264385e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5367s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1435s for 81920 events => throughput is 5.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.786323e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.850142e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4275s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 8192 events => throughput is 3.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.7739s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2259s for 81920 events => throughput is 3.63E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.522776e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.620788e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +524,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +534,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.6227s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6088s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s + [COUNTERS] PROGRAM TOTAL : 0.8511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.57E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5269s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0177s for 81920 events => throughput is 4.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s + [COUNTERS] PROGRAM TOTAL : 1.9977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 81920 events => throughput is 8.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.391294e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.921444e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.401261e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.230318e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.774650e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.714613e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.525092e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.316499e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.778220e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.728492e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.225843e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.598150e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.784062e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.745533e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.866082e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.694862e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 01d41bcb36..0850891597 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_21:10:19 +DATE: 2024-09-18_13:41:56 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.6353s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6036s - [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8368s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7947s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.94E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3566s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3248s - [COUNTERS] Fortran MEs ( 1 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4076s + [COUNTERS] Fortran MEs ( 1 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5566s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2403s - [COUNTERS] Fortran MEs ( 1 ) : 0.3163s for 81920 events => throughput is 2.59E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5472s + [COUNTERS] Fortran MEs ( 1 ) : 0.4205s for 81920 events => throughput is 1.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138605296829816] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4454s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4032s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138605296829816) differ by less than 4E-4 (1.4152313931869998e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144592003933589] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.4310s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1286s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3022s for 81920 events => throughput is 2.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9812s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144592003933589) differ by less than 4E-4 (8.968863673963767e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.847123e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.959611e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.968057e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.959548e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602746994408] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3350s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4226s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.69E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602746994408) differ by less than 4E-4 (1.956154279669775e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144589414828133] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2639s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1411s for 81920 events => throughput is 5.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.7203s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1724s for 81920 events => throughput is 4.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144589414828133) differ by less than 4E-4 (1.44607029572974e-07) +OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.663521e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.738872e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.691631e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.748017e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602995819163] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3201s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4166s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602995819163) differ by less than 4E-4 (1.9033685183522664e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144587555291501] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1791s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0682s for 81920 events => throughput is 1.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6374s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0918s for 81920 events => throughput is 8.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144587555291501) differ by less than 4E-4 (1.840502910077646e-07) +OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.176918e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.120680e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.193326e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.23E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0864s for 81920 events => throughput is 9.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.788116e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.189981e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.789950e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6732s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1222s for 81920 events => throughput is 6.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.765000e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.898629e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138605197694872] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.6623s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6495s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0075s + [COUNTERS] PROGRAM TOTAL : 0.8533s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8496s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138605197694872) differ by less than 4E-4 (1.4362619105146024e-07) +OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144590142508306] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3935s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3777s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s + [COUNTERS] PROGRAM TOTAL : 1.9917s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 81920 events => throughput is 9.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144590142508306) differ by less than 4E-4 (1.2917195901795964e-07) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.708770e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.139565e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.800159e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.535095e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.141802e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.504949e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.784579e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.302031e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.174041e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.479596e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.634532e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.319419e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.761258e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.230042e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.329932e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.720607e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 462c7a33d0..1cd7f5e3d4 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_21:10:37 +DATE: 2024-09-18_13:42:23 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.6487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6170s - [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8020s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3602s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3285s - [COUNTERS] Fortran MEs ( 1 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.4921s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1747s - [COUNTERS] Fortran MEs ( 1 ) : 0.3175s for 81920 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9802s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5584s + [COUNTERS] Fortran MEs ( 1 ) : 0.4219s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3635s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 8192 events => throughput is 2.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4569s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0456s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5363s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3563s for 81920 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0058s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4529s for 81920 events => throughput is 1.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446513367086368e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.352222e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.833075e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.358203e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.846422e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3490s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3279s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4330s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597573367527] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3859s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2089s for 81920 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2487s for 81920 events => throughput is 3.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367527) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.933966e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.318121e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.951899e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.355189e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613336664328] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3792s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0159s for 8192 events => throughput is 5.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613336664328) differ by less than 2E-4 (2.9034163517849265e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597613828985] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1786s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1195s for 81920 events => throughput is 6.86E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1532s for 81920 events => throughput is 5.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597613828985) differ by less than 2E-4 (2.9304754622927476e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.054164e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.280868e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.319511e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4022s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0144s for 8192 events => throughput is 5.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6742s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5312s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1426s for 81920 events => throughput is 5.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.827488e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.114502e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.962674e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.7709s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2228s for 81920 events => throughput is 3.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.662408e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.633047e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611963547795] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.6425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6292s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s + [COUNTERS] PROGRAM TOTAL : 0.8496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138611963547795) differ by less than 2E-4 (9.517397980829401e-11) +OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232269080] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.5279s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5024s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0177s for 81920 events => throughput is 4.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0078s + [COUNTERS] PROGRAM TOTAL : 1.9898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144596232269080) differ by less than 2E-4 (1.9539925233402755e-14) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.389948e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.961867e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.430104e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.402195e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.792157e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.751023e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.485305e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.487612e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.790389e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.767038e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.203783e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.725223e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.775819e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.748403e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.875599e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.694986e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index c80769695f..652edcf84f 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:10:56 +DATE: 2024-09-18_13:42:52 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.6228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s - [COUNTERS] Fortran MEs ( 1 ) : 0.2327s for 8192 events => throughput is 3.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7493s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4200s + [COUNTERS] Fortran MEs ( 1 ) : 0.3293s for 8192 events => throughput is 2.49E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3055s - [COUNTERS] Fortran MEs ( 1 ) : 0.2333s for 8192 events => throughput is 3.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3834s + [COUNTERS] Fortran MEs ( 1 ) : 0.3316s for 8192 events => throughput is 2.47E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.4842s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3122s - [COUNTERS] Fortran MEs ( 1 ) : 2.1720s for 81920 events => throughput is 3.77E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1953s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8899s + [COUNTERS] Fortran MEs ( 1 ) : 3.3054s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2880s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2546s for 8192 events => throughput is 3.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3477s for 8192 events => throughput is 2.36E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 4.2062s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3840s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8215s for 81920 events => throughput is 2.90E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.3596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8952s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4633s for 81920 events => throughput is 2.37E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.988318e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.471888e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.012656e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.456119e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4562s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1419s for 8192 events => throughput is 5.77E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5680s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3869s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 8192 events => throughput is 4.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8213s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4027s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4181s for 81920 events => throughput is 5.78E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 3.6931s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8866s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8056s for 81920 events => throughput is 4.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.842744e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.653702e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.866708e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.694721e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3131s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0905s for 8192 events => throughput is 9.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720207E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606505E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.1081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7097s for 81920 events => throughput is 1.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.8119s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9041s for 81920 events => throughput is 9.06E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606505E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.204129e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.368531e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.198782e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.254727e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4668s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0809s for 8192 events => throughput is 1.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.6912s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8083s for 81920 events => throughput is 1.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.047750e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.045236e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,110 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7001s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6719s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0143s + [COUNTERS] PROGRAM TOTAL : 0.5013s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3866s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1140s for 8192 events => throughput is 7.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 3.0360s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1383s for 81920 events => throughput is 7.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.160157e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.320569e+04 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8227s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.68E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8408s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0932s for 81920 events => throughput is 8.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0139s + [COUNTERS] PROGRAM TOTAL : 2.3543s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3252s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0261s for 81920 events => throughput is 3.14E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.013717e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.134986e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.115536e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.475726e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.642640e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.339604e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.831605e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.161734e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.514831e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.354476e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.707768e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.170951e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.631145e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.318892e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.701309e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.662470e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index d7948fe70d..3362abfbc9 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:11:27 +DATE: 2024-09-18_13:43:35 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5642s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s - [COUNTERS] Fortran MEs ( 1 ) : 0.2324s for 8192 events => throughput is 3.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] Fortran MEs ( 1 ) : 0.3301s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s - [COUNTERS] Fortran MEs ( 1 ) : 0.2327s for 8192 events => throughput is 3.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3839s + [COUNTERS] Fortran MEs ( 1 ) : 0.3307s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.7189s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4075s - [COUNTERS] Fortran MEs ( 1 ) : 2.3113s for 81920 events => throughput is 3.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8749s + [COUNTERS] Fortran MEs ( 1 ) : 3.3030s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474238393007253E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5702s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2572s for 8192 events => throughput is 3.18E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.7223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3896s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3317s for 8192 events => throughput is 2.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474238393007253E-002) differ by less than 4E-4 (1.6693007842683016e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971543373778375E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.9659s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4061s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5591s for 81920 events => throughput is 3.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 5.2162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8886s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3267s for 81920 events => throughput is 2.46E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971543373778375E-002) differ by less than 4E-4 (1.8503863641328167e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.306143e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.539712e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.307654e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530008e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474229018345096E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 8192 events => throughput is 1.04E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1015s for 8192 events => throughput is 8.07E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474229018345096E-002) differ by less than 4E-4 (2.8639171045785616e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971534528332888E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.2208s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4149s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8056s for 81920 events => throughput is 1.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.9019s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8838s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0176s for 81920 events => throughput is 8.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971534528332888E-002) differ by less than 4E-4 (2.9564602843645815e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.039215e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.124328e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.034006e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.199350e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474228627553363E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4313s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3842s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0467s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474228627553363E-002) differ by less than 4E-4 (2.9137158252812156e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971533958864222E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.7725s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3692s for 81920 events => throughput is 2.22E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.3507s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8840s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971533958864222E-002) differ by less than 4E-4 (3.027669184252346e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.411160e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.796536e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.786454e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4292s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3290s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4263s for 81920 events => throughput is 1.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.981352e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.427122e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.997761e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0556s for 8192 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.4713s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9066s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5643s for 81920 events => throughput is 1.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.458815e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.487461e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474239700037612E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6449s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0134s + [COUNTERS] PROGRAM TOTAL : 0.8337s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474239700037612E-002) differ by less than 4E-4 (1.5027454702831733e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971544830799671E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.7547s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0368s for 81920 events => throughput is 2.22E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0137s + [COUNTERS] PROGRAM TOTAL : 2.3421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 81920 events => throughput is 6.21E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971544830799671E-002) differ by less than 4E-4 (1.6681939285501102e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.086951e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.709678e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.086184e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.936833e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.463630e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.247414e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.705248e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.199841e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.471416e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.195768e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.726540e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.278448e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.330755e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.108387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.027556e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.230857e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index bc0a9b927a..4de53c2d38 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:11:56 +DATE: 2024-09-18_13:44:15 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5586s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3257s - [COUNTERS] Fortran MEs ( 1 ) : 0.2329s for 8192 events => throughput is 3.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7391s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.3310s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5383s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s - [COUNTERS] Fortran MEs ( 1 ) : 0.2303s for 8192 events => throughput is 3.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3840s + [COUNTERS] Fortran MEs ( 1 ) : 0.3281s for 8192 events => throughput is 2.50E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.7124s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3979s - [COUNTERS] Fortran MEs ( 1 ) : 2.3145s for 81920 events => throughput is 3.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1945s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8920s + [COUNTERS] Fortran MEs ( 1 ) : 3.3025s for 81920 events => throughput is 2.48E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252272193679E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3092s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2852s for 8192 events => throughput is 2.87E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 8192 events => throughput is 2.34E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252272193679E-002) differ by less than 2E-4 (9.93285631523122e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558933520065E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 4.2315s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4023s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8285s for 81920 events => throughput is 2.90E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.4502s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8950s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5541s for 81920 events => throughput is 2.30E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558933520065E-002) differ by less than 2E-4 (9.527307387457995e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.939184e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.405336e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.931342e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.411690e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252220105081E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1437s for 8192 events => throughput is 5.70E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1790s for 8192 events => throughput is 4.58E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252220105081E-002) differ by less than 2E-4 (9.269089717989232e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558934000736E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8365s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4064s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.4297s for 81920 events => throughput is 5.73E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 3.6943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8846s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8089s for 81920 events => throughput is 4.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558934000736E-002) differ by less than 2E-4 (9.53331791286871e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.769522e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.686401e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.769710e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.704142e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252077403842E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3056s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0892s for 8192 events => throughput is 9.18E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252077403842E-002) differ by less than 2E-4 (7.450642991457812e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558777659491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.0941s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3918s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7020s for 81920 events => throughput is 1.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.7937s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8954s for 81920 events => throughput is 9.15E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558777659491E-002) differ by less than 2E-4 (7.578357275050962e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.199948e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.255440e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.306810e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.362786e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3893s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0782s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.6895s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7956s for 81920 events => throughput is 1.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.053887e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.065947e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251477062731E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6104s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0142s + [COUNTERS] PROGRAM TOTAL : 0.5056s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3870s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1179s for 8192 events => throughput is 6.95E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251477062731E-002) differ by less than 2E-4 (1.9952373087051e-10) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +479,149 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558174786780E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8413s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7334s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0936s for 81920 events => throughput is 8.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0144s + [COUNTERS] PROGRAM TOTAL : 3.0551s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8834s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1711s for 81920 events => throughput is 7.00E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.918098e+04 ) sec^-1 -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558174786780E-002) differ by less than 2E-4 (3.976818874207311e-11) +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.013587e+04 ) sec^-1 -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8269s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3560s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0262s for 81920 events => throughput is 3.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.091802e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.114701e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.071153e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.463889e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.640320e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.291446e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.832709e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155947e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.638301e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.267147e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.710488e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.165743e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.630540e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.239896e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.702853e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.648131e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 6b9f2afff2..da4192a0d3 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:12:28 +DATE: 2024-09-18_13:44:59 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.1672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3317s - [COUNTERS] Fortran MEs ( 1 ) : 2.8355s for 8192 events => throughput is 2.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s + [COUNTERS] Fortran MEs ( 1 ) : 4.3146s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.0939s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2445s - [COUNTERS] Fortran MEs ( 1 ) : 2.8494s for 8192 events => throughput is 2.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5801s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s + [COUNTERS] Fortran MEs ( 1 ) : 4.2843s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 30.0793s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5366s - [COUNTERS] Fortran MEs ( 1 ) : 28.5427s for 81920 events => throughput is 2.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.0141s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s + [COUNTERS] Fortran MEs ( 1 ) : 42.9445s for 81920 events => throughput is 1.91E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.8844s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2797s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5973s for 8192 events => throughput is 2.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s + [COUNTERS] PROGRAM TOTAL : 4.7546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4446s for 8192 events => throughput is 1.84E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283632] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 37.7445s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5656s - [COUNTERS] CudaCpp MEs ( 2 ) : 36.1719s for 81920 events => throughput is 2.26E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s + [COUNTERS] PROGRAM TOTAL : 46.6278s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0620s + [COUNTERS] CudaCpp MEs ( 2 ) : 44.5568s for 81920 events => throughput is 1.84E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283632) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.374831e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.897024e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.230830e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.894466e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926832] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.0045s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2685s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7319s for 8192 events => throughput is 4.73E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0041s + [COUNTERS] PROGRAM TOTAL : 2.6753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3715s for 8192 events => throughput is 3.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926832) differ by less than 3E-14 (1.2212453270876722e-15) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283630] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 18.9916s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s - [COUNTERS] CudaCpp MEs ( 2 ) : 17.4487s for 81920 events => throughput is 4.69E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s + [COUNTERS] PROGRAM TOTAL : 25.8378s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0749s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.7582s for 81920 events => throughput is 3.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283630) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.829240e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.534769e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.836399e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.572800e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926854] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.0244s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7634s for 8192 events => throughput is 1.07E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2956s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0420s for 8192 events => throughput is 7.86E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926854) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283624] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 9.5121s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5355s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.9749s for 81920 events => throughput is 1.03E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 12.4720s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0723s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3972s for 81920 events => throughput is 7.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283624) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094613e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.115646e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.135568e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.2200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9191s for 8192 events => throughput is 8.91E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 11.2276s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0702s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.1552s for 81920 events => throughput is 8.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.261139e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101634e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.284539e+03 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.4788s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2959s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1801s for 8192 events => throughput is 6.94E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 13.8631s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0851s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7752s for 81920 events => throughput is 6.96E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.044735e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.087685e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 8192 events => throughput is 7.32E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0993s + [COUNTERS] PROGRAM TOTAL : 0.8126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7388s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 8192 events => throughput is 2.10E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283644] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 3.0388s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8723s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0668s for 81920 events => throughput is 7.68E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0997s + [COUNTERS] PROGRAM TOTAL : 2.9095s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5487s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3260s for 81920 events => throughput is 2.51E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930270975283644) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.452923e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.150288e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.410641e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.340464e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.693558e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.120076e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.121248e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.169270e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.696136e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.124208e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.679912e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.169177e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.692445e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.120876e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.546236e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.432039e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 096b3f824b..d51442efc8 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:15:41 +DATE: 2024-09-18_13:48:49 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.0802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2419s - [COUNTERS] Fortran MEs ( 1 ) : 2.8384s for 8192 events => throughput is 2.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5851s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2964s + [COUNTERS] Fortran MEs ( 1 ) : 4.2887s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.0676s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2470s - [COUNTERS] Fortran MEs ( 1 ) : 2.8206s for 8192 events => throughput is 2.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] Fortran MEs ( 1 ) : 4.2983s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 30.1218s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5491s - [COUNTERS] Fortran MEs ( 1 ) : 28.5727s for 81920 events => throughput is 2.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.0593s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0904s + [COUNTERS] Fortran MEs ( 1 ) : 42.9689s for 81920 events => throughput is 1.91E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145004642682091] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.7398s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2582s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4741s for 8192 events => throughput is 2.36E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s + [COUNTERS] PROGRAM TOTAL : 4.6331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3295s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33145004642682091) differ by less than 4E-4 (4.6745046844431926e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930342252742398] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 36.6310s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6637s - [COUNTERS] CudaCpp MEs ( 2 ) : 34.9569s for 81920 events => throughput is 2.34E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0105s + [COUNTERS] PROGRAM TOTAL : 45.4961s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0670s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.4203s for 81920 events => throughput is 1.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930342252742398) differ by less than 4E-4 (3.405472335016313e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.440002e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953905e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.451889e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.953638e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +205,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144996928807552] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.1311s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2575s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8713s for 8192 events => throughput is 9.40E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [COUNTERS] PROGRAM TOTAL : 1.5948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2947s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2975s for 8192 events => throughput is 6.31E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144996928807552) differ by less than 4E-4 (4.441772461838411e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +240,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930338466143997] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 10.2003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5319s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.6665s for 81920 events => throughput is 9.45E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 14.0680s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0718s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9937s for 81920 events => throughput is 6.83E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930338466143997) differ by less than 4E-4 (3.2245574101974483e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.681677e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.014424e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.754711e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.021993e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +285,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145003508801812] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.6510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3938s for 8192 events => throughput is 2.08E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8304s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5290s for 8192 events => throughput is 1.55E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33145003508801812) differ by less than 4E-4 (4.6402948361556895e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +320,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930341333868943] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 5.6819s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5913s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.0897s for 81920 events => throughput is 2.00E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 7.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0696s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.2285s for 81920 events => throughput is 1.57E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930341333868943) differ by less than 4E-4 (3.361570683813042e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.179139e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.599753e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.600977e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.7627s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4666s for 8192 events => throughput is 1.76E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 6.8208s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0681s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7514s for 81920 events => throughput is 1.72E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.808420e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.157595e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.816362e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.8859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 8192 events => throughput is 1.39E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 7.9475s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0831s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8629s for 81920 events => throughput is 1.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.420878e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.411954e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +525,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145003213125773] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0728s for 8192 events => throughput is 1.13E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1044s + [COUNTERS] PROGRAM TOTAL : 0.7891s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7375s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33145003213125773) differ by less than 4E-4 (4.631374112662456e-06) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +560,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930346912077236] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.6538s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6818s for 81920 events => throughput is 1.20E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1057s + [COUNTERS] PROGRAM TOTAL : 2.8062s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2338s for 81920 events => throughput is 3.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930346912077236) differ by less than 4E-4 (3.6280845909786308e-06) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150702e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.088372e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.789281e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.376508e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.112095e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.114154e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.070936e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.259362e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.122768e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.087087e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.698596e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.241028e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.056109e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.079549e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.797688e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.391392e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 5d6c5fa5e3..4029a4bd08 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:18:52 +DATE: 2024-09-18_13:51:53 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 3.2993s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s - [COUNTERS] Fortran MEs ( 1 ) : 2.9845s for 8192 events => throughput is 2.74E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s + [COUNTERS] Fortran MEs ( 1 ) : 4.2815s for 8192 events => throughput is 1.91E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.0889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2453s - [COUNTERS] Fortran MEs ( 1 ) : 2.8435s for 8192 events => throughput is 2.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.6114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2967s + [COUNTERS] Fortran MEs ( 1 ) : 4.3148s for 8192 events => throughput is 1.90E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 30.1871s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5311s - [COUNTERS] Fortran MEs ( 1 ) : 28.6560s for 81920 events => throughput is 2.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 45.1244s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0863s + [COUNTERS] Fortran MEs ( 1 ) : 43.0382s for 81920 events => throughput is 1.90E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849880304822] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.8463s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2466s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5926s for 8192 events => throughput is 2.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s + [COUNTERS] PROGRAM TOTAL : 4.8194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2986s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5116s for 8192 events => throughput is 1.82E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0093s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849880304822) differ by less than 2E-4 (5.230916810816666e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271054111049] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 37.5708s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5608s - [COUNTERS] CudaCpp MEs ( 2 ) : 36.0030s for 81920 events => throughput is 2.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s + [COUNTERS] PROGRAM TOTAL : 47.3150s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0784s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.2275s for 81920 events => throughput is 1.81E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271054111049) differ by less than 2E-4 (3.766192246956734e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.356933e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.873631e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.377720e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.874665e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849797290254] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.9860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2638s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7191s for 8192 events => throughput is 4.77E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 2.6800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3756s for 8192 events => throughput is 3.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849797290254) differ by less than 2E-4 (2.7263173940639263e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271025983213] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 18.9773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5499s - [COUNTERS] CudaCpp MEs ( 2 ) : 17.4239s for 81920 events => throughput is 4.70E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0035s + [COUNTERS] PROGRAM TOTAL : 25.8989s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0743s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8193s for 81920 events => throughput is 3.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0054s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025983213) differ by less than 2E-4 (2.4223090200337083e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.947460e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.519279e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.987211e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.530635e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849773665513] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.0155s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7655s for 8192 events => throughput is 1.07E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 1.3351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2976s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0351s for 8192 events => throughput is 7.91E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849773665513) differ by less than 2E-4 (2.013544886381169e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271025898603] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 9.4472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5509s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.8947s for 81920 events => throughput is 1.04E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [COUNTERS] PROGRAM TOTAL : 12.4644s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0722s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.3898s for 81920 events => throughput is 7.88E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025898603) differ by less than 2E-4 (2.418266698001048e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080026e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.009062e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.120880e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.1951s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2962s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8968s for 8192 events => throughput is 9.13E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 11.2498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0961s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.1516s for 81920 events => throughput is 8.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.318473e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.101222e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.350512e+03 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.4981s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3004s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1949s for 8192 events => throughput is 6.86E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 13.9738s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0733s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8977s for 81920 events => throughput is 6.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.010058e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.974396e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849679653593] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7773s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1119s for 8192 events => throughput is 7.32E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0994s + [COUNTERS] PROGRAM TOTAL : 0.8114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7381s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0386s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33144849679653593) differ by less than 2E-4 (8.228511205743416e-10) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271009954451] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 3.0159s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8455s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0702s for 81920 events => throughput is 7.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1001s + [COUNTERS] PROGRAM TOTAL : 2.9001s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3371s for 81920 events => throughput is 2.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930271009954451) differ by less than 2E-4 (1.6564918325912004e-09) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.427960e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.156591e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.410829e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.143626e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.681706e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.122372e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.117500e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.161172e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.677894e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.154782e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.670389e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.164268e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.680352e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.117598e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.532654e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.420328e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index e4dcd810b0..b5fe53dcd6 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + + +make USEBUILDDIR=1 BACKEND=cuda + + +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-18_21:22:43 +DATE: 2024-09-18_13:57:21 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.7332s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4667s - [COUNTERS] Fortran MEs ( 1 ) : 62.2665s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.9942s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5376s + [COUNTERS] Fortran MEs ( 1 ) : 100.4566s for 8192 events => throughput is 8.15E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.6313s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3935s - [COUNTERS] Fortran MEs ( 1 ) : 62.2378s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.8250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5356s + [COUNTERS] Fortran MEs ( 1 ) : 100.2893s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 625.5755s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1317s - [COUNTERS] Fortran MEs ( 1 ) : 622.4438s for 81920 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1009.3485s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5474s + [COUNTERS] Fortran MEs ( 1 ) : 1004.8011s for 81920 events => throughput is 8.15E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729949E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 99.4442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4474s - [COUNTERS] CudaCpp MEs ( 2 ) : 98.8367s for 8192 events => throughput is 8.29E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1601s + [COUNTERS] PROGRAM TOTAL : 122.6272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5269s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.8976s for 8192 events => throughput is 6.72E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2027s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729949E-007) differ by less than 3E-14 (3.552713678800501e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -139,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 986.3468s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0893s - [COUNTERS] CudaCpp MEs ( 2 ) : 983.1056s for 81920 events => throughput is 8.33E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1519s + [COUNTERS] PROGRAM TOTAL : 1215.7257s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3426s + [COUNTERS] CudaCpp MEs ( 2 ) : 1211.1771s for 81920 events => throughput is 6.76E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2060s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.044799e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.947835e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.046587e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.953028e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -184,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729943E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 49.4765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4488s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.8907s for 8192 events => throughput is 1.68E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1370s + [COUNTERS] PROGRAM TOTAL : 65.0412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5159s + [COUNTERS] CudaCpp MEs ( 2 ) : 64.4246s for 8192 events => throughput is 1.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729943E-007) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -219,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333069E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 494.5399s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0807s - [COUNTERS] CudaCpp MEs ( 2 ) : 491.3813s for 81920 events => throughput is 1.67E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0779s + [COUNTERS] PROGRAM TOTAL : 643.1942s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3363s + [COUNTERS] CudaCpp MEs ( 2 ) : 638.7557s for 81920 events => throughput is 1.28E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1021s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333069E-007) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.084386e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.582676e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.067891e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.328359e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -264,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729933E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 22.9679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4156s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.4381s for 8192 events => throughput is 3.65E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1142s + [COUNTERS] PROGRAM TOTAL : 28.5049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5111s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.9480s for 8192 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729933E-007) differ by less than 3E-14 (2.886579864025407e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -299,45 +319,309 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 226.5648s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1182s - [COUNTERS] CudaCpp MEs ( 2 ) : 223.4112s for 81920 events => throughput is 3.67E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0354s + [COUNTERS] PROGRAM TOTAL : 284.0441s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3313s + [COUNTERS] CudaCpp MEs ( 2 ) : 279.6676s for 81920 events => throughput is 2.93E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.599435e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.544656e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.429932e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 25.4242s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5110s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8732s for 8192 events => throughput is 3.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0401s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 258.4274s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3572s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.0306s for 81920 events => throughput is 3.22E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0397s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.924332e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.943882e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 26.1506s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5777s for 8192 events => throughput is 3.20E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 258.8965s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4685s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.3821s for 81920 events => throughput is 3.22E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0458s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.459682e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.538954e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.450518e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 3.2103s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0334s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1011s for 8192 events => throughput is 7.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0758s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 16.9165s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9531s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8819s for 81920 events => throughput is 7.53E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0816s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.508582e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.240924e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.270483e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.589702e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.287293e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.424280e+03 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.273685e+03 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.243085e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 381d54d555..2a956cd657 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + +make USEBUILDDIR=1 BACKEND=cuda + +make USEBUILDDIR=1 BACKEND=cppnone + + +make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-18_22:11:08 +DATE: 2024-09-18_15:14:59 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.6606s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3995s - [COUNTERS] Fortran MEs ( 1 ) : 62.2611s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.8383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5370s + [COUNTERS] Fortran MEs ( 1 ) : 100.3014s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.7300s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4148s - [COUNTERS] Fortran MEs ( 1 ) : 62.3153s for 8192 events => throughput is 1.31E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.6800s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5304s + [COUNTERS] Fortran MEs ( 1 ) : 100.1496s for 8192 events => throughput is 8.18E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 626.2858s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1968s - [COUNTERS] Fortran MEs ( 1 ) : 623.0890s for 81920 events => throughput is 1.31E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1006.5135s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5331s + [COUNTERS] Fortran MEs ( 1 ) : 1001.9804s for 81920 events => throughput is 8.18E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575308139230432E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.5057s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4418s - [COUNTERS] CudaCpp MEs ( 2 ) : 100.8607s for 8192 events => throughput is 8.12E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2031s + [COUNTERS] PROGRAM TOTAL : 113.7634s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5261s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.0501s for 8192 events => throughput is 7.25E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1871s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575308139230432E-007) differ by less than 4E-4 (0.0001395002856556804) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -140,39 +160,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846099389242361E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1010.3499s - [COUNTERS] Fortran Overhead ( 0 ) : 3.2645s - [COUNTERS] CudaCpp MEs ( 2 ) : 1006.8322s for 81920 events => throughput is 8.14E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2532s + [COUNTERS] PROGRAM TOTAL : 1135.0851s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4478s + [COUNTERS] CudaCpp MEs ( 2 ) : 1130.4514s for 81920 events => throughput is 7.25E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1858s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846099389242361E-007) differ by less than 4E-4 (0.00014187637267237818) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.652228e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.611057e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.705470e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.618948e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -186,25 +206,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575303913232094E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 23.5925s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4382s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.0566s for 8192 events => throughput is 3.55E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0978s + [COUNTERS] PROGRAM TOTAL : 28.2816s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5245s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.7095s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0476s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575303913232094E-007) differ by less than 4E-4 (0.00013932100537483727) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -222,39 +242,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846096068245575E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 233.7583s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1877s - [COUNTERS] CudaCpp MEs ( 2 ) : 230.5373s for 81920 events => throughput is 3.55E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s + [COUNTERS] PROGRAM TOTAL : 283.1165s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4533s + [COUNTERS] CudaCpp MEs ( 2 ) : 278.6156s for 81920 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0477s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846096068245575E-007) differ by less than 4E-4 (0.00014173098820635666) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.323061e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.409666e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.315479e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.406819e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -268,25 +288,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575304434295576E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 11.7038s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4244s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2050s for 8192 events => throughput is 7.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0744s + [COUNTERS] PROGRAM TOTAL : 14.8123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5276s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.2607s for 8192 events => throughput is 5.74E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0240s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575304434295576E-007) differ by less than 4E-4 (0.0001393431105436438) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -304,46 +324,314 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846087407964351E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 117.0635s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1472s - [COUNTERS] CudaCpp MEs ( 2 ) : 113.8984s for 81920 events => throughput is 7.19E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0179s + [COUNTERS] PROGRAM TOTAL : 146.8484s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4408s + [COUNTERS] CudaCpp MEs ( 2 ) : 142.3838s for 81920 events => throughput is 5.75E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846087407964351E-007) differ by less than 4E-4 (0.00014135186397323807) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.316164e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.847317e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.876249e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 13.2590s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5224s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7153s for 8192 events => throughput is 6.44E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 130.9182s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4397s + [COUNTERS] CudaCpp MEs ( 2 ) : 126.4573s for 81920 events => throughput is 6.48E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0212s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.748756e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.729474e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 13.3128s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5317s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.7560s for 8192 events => throughput is 6.42E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0251s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 131.9849s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4720s + [COUNTERS] CudaCpp MEs ( 2 ) : 127.4891s for 81920 events => throughput is 6.43E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0238s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.915808e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.950771e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.934421e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.1536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5409s for 8192 events => throughput is 1.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5574s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 10.9466s + [COUNTERS] Fortran Overhead ( 0 ) : 5.0484s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3690s for 81920 events => throughput is 1.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5292s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.538905e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.534050e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.137147e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.187870e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.144301e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.161388e+04 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.156097e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.971114e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 22716a3d50..e04ca3f869 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + + +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppsse4 + + +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-09-18_22:52:26 +DATE: 2024-09-18_16:14:25 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.5715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3992s - [COUNTERS] Fortran MEs ( 1 ) : 62.1724s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.0730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5330s + [COUNTERS] Fortran MEs ( 1 ) : 100.5400s for 8192 events => throughput is 8.15E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.4809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4131s - [COUNTERS] Fortran MEs ( 1 ) : 62.0678s for 8192 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.8963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5347s + [COUNTERS] Fortran MEs ( 1 ) : 100.3617s for 8192 events => throughput is 8.16E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 625.4473s - [COUNTERS] Fortran Overhead ( 0 ) : 3.2009s - [COUNTERS] Fortran MEs ( 1 ) : 622.2463s for 81920 events => throughput is 1.32E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1008.5494s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5481s + [COUNTERS] Fortran MEs ( 1 ) : 1004.0013s for 81920 events => throughput is 8.16E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019963403161E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 98.0342s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4335s - [COUNTERS] CudaCpp MEs ( 2 ) : 97.3857s for 8192 events => throughput is 8.41E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2150s + [COUNTERS] PROGRAM TOTAL : 119.7272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5266s + [COUNTERS] CudaCpp MEs ( 2 ) : 118.9906s for 8192 events => throughput is 6.88E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2100s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019963403161E-007) differ by less than 2E-4 (5.416306958494488e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -139,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858650293213E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 983.7723s - [COUNTERS] Fortran Overhead ( 0 ) : 3.2081s - [COUNTERS] CudaCpp MEs ( 2 ) : 980.4121s for 81920 events => throughput is 8.36E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1521s + [COUNTERS] PROGRAM TOTAL : 1235.8333s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4485s + [COUNTERS] CudaCpp MEs ( 2 ) : 1231.1755s for 81920 events => throughput is 6.65E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2092s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858650293213E-007) differ by less than 2E-4 (5.3828717039294816e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.065759e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.603593e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.051001e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.563954e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -184,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019985761424E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 47.7625s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4830s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.2064s for 8192 events => throughput is 1.74E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0730s + [COUNTERS] PROGRAM TOTAL : 64.0094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5236s + [COUNTERS] CudaCpp MEs ( 2 ) : 63.3817s for 8192 events => throughput is 1.29E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1041s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019985761424E-007) differ by less than 2E-4 (6.364815563486559e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -219,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858654239918E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 475.7938s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1594s - [COUNTERS] CudaCpp MEs ( 2 ) : 472.5616s for 81920 events => throughput is 1.73E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0729s + [COUNTERS] PROGRAM TOTAL : 636.9448s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4347s + [COUNTERS] CudaCpp MEs ( 2 ) : 632.4070s for 81920 events => throughput is 1.30E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1031s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858654239918E-007) differ by less than 2E-4 (5.555647941690722e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.177595e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.548168e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.205740e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.548990e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -264,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019990398792E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 22.3346s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4024s - [COUNTERS] CudaCpp MEs ( 2 ) : 21.8532s for 8192 events => throughput is 3.75E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0790s + [COUNTERS] PROGRAM TOTAL : 28.2427s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5259s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.6712s for 8192 events => throughput is 2.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019990398792E-007) differ by less than 2E-4 (6.5615473054947415e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -299,45 +319,309 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858652988808E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 219.1677s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1155s - [COUNTERS] CudaCpp MEs ( 2 ) : 216.0189s for 81920 events => throughput is 3.79E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0333s + [COUNTERS] PROGRAM TOTAL : 285.3174s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4390s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.8328s for 81920 events => throughput is 2.92E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0456s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858652988808E-007) differ by less than 2E-4 (5.500877753306099e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.927048e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.559056e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.558709e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 25.3100s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5275s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7433s for 8192 events => throughput is 3.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0391s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 250.0775s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4353s + [COUNTERS] CudaCpp MEs ( 2 ) : 245.6035s for 81920 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0387s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.143810e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.119192e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 25.7695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5251s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.1974s for 8192 events => throughput is 3.25E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0470s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 262.3166s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4708s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.7997s for 81920 events => throughput is 3.18E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0461s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.519965e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.877390e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.518227e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.7761s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0249s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8762s for 8192 events => throughput is 9.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8750s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 14.4588s + [COUNTERS] Fortran Overhead ( 0 ) : 4.9583s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.6324s for 81920 events => throughput is 9.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8681s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.423002e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.078690e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.104813e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.152942e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106947e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110409e+04 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106917e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.676393e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 658b9ffa5c..13fa996bcb 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip - -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_21:22:04 +DATE: 2024-09-18_13:55:45 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4508s - [COUNTERS] Fortran MEs ( 1 ) : 0.0507s for 8192 events => throughput is 1.62E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5290s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4573s + [COUNTERS] Fortran MEs ( 1 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3202s - [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4070s + [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.9115s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3930s - [COUNTERS] Fortran MEs ( 1 ) : 0.5185s for 81920 events => throughput is 1.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5996s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8818s + [COUNTERS] Fortran MEs ( 1 ) : 0.7178s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0617s for 8192 events => throughput is 1.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4885s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4104s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575784] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0389s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6315s for 81920 events => throughput is 1.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.6882s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7791s for 81920 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575784) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323639e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.061513e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.328591e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.071810e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456874] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3556s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4501s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0432s for 8192 events => throughput is 1.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456874) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7525s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3540s for 81920 events => throughput is 2.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.3263s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4293s for 81920 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575781) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.273310e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.907415e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.290081e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.918851e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3485s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3287s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 8192 events => throughput is 4.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4325s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575775] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6011s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1894s for 81920 events => throughput is 4.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.1464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8971s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2488s for 81920 events => throughput is 3.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575775) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.474030e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.325644e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.484493e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.338006e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4312s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.1356s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2252s for 81920 events => throughput is 3.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.464520e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.541284e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,32 +444,184 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x677d050) on address 0x1493b7ab5000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x14964ec162e2 in ??? -#1 0x14964ec15475 in ??? -#2 0x14964ebe590f in ??? -#3 0x14964e886d2b in ??? -#4 0x14964e8883e4 in ??? -#5 0x149644aeed1b in ??? -#6 0x149644ae8bc8 in ??? -#7 0x149644a9a9e6 in ??? -#8 0x14964ebd96e9 in ??? -#9 0x14964e95450e in ??? -#10 0xffffffffffffffff in ??? -./madX.sh: line 400: 111168 Aborted (core dumped) $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4489s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0347s for 8192 events => throughput is 2.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.2441s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3367s for 81920 events => throughput is 2.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.393978e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.414185e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8498s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3403s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3289s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0103s for 81920 events => throughput is 7.96E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.881632e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.254031e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.277303e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.101959e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.239939e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.286389e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.254121e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.640364e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 1fc413ce73..0c2abc603a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_21:22:17 +DATE: 2024-09-18_13:56:17 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3577s - [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5308s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4584s + [COUNTERS] Fortran MEs ( 1 ) : 0.0725s for 8192 events => throughput is 1.13E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3210s - [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4871s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4149s + [COUNTERS] Fortran MEs ( 1 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.9319s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4022s - [COUNTERS] Fortran MEs ( 1 ) : 0.5297s for 81920 events => throughput is 1.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6180s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8993s + [COUNTERS] Fortran MEs ( 1 ) : 0.7187s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313702859087712] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3759s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3186s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0570s for 8192 events => throughput is 1.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0735s for 8192 events => throughput is 1.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313702859087712) differ by less than 4E-4 (5.6840001816382824e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095770771365008] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2457s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6017s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6436s for 81920 events => throughput is 1.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.6552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9225s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7321s for 81920 events => throughput is 1.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095770771365008) differ by less than 4E-4 (2.86887245071199e-08) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.523849e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.127783e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.477916e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.135487e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313700465139972] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3260s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4362s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4092s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 8192 events => throughput is 3.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700465139972) differ by less than 4E-4 (6.100891492000216e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095768752291760] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6249s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2168s for 81920 events => throughput is 3.78E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2678s for 81920 events => throughput is 3.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768752291760) differ by less than 4E-4 (1.2439858076973564e-07) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.985922e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.031782e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.995161e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.033396e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313700354235445] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3351s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0133s for 8192 events => throughput is 6.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700354235445) differ by less than 4E-4 (6.646850714275843e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095768538537163] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5138s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4058s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1078s for 81920 events => throughput is 7.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0338s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1338s for 81920 events => throughput is 6.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768538537163) differ by less than 4E-4 (1.3453116110007102e-07) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.115197e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.165970e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.197191e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.0322s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9087s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1232s for 81920 events => throughput is 6.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.618658e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.940855e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.353403e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4238s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4063s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.0846s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1719s for 81920 events => throughput is 4.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.731547e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.814682e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,32 +524,104 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x677d070) on address 0x14fd9c0ff000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x15003322b2e2 in ??? -#1 0x15003322a475 in ??? -#2 0x1500331fa90f in ??? -#3 0x150032e9bd2b in ??? -#4 0x150032e9d3e4 in ??? -#5 0x150029103d1b in ??? -#6 0x1500290fdbc8 in ??? -#7 0x1500290af9e6 in ??? -#8 0x1500331ee6e9 in ??? -#9 0x150032f6950e in ??? -#10 0xffffffffffffffff in ??? -./madX.sh: line 400: 112178 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8504s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8464s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3852s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 8.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.088649e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.406235e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.833815e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.147443e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.839780e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.591152e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.570167e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.189550e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 9dcc27eea4..d3b173c725 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-09-18_21:22:30 +DATE: 2024-09-18_13:56:48 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4413s - [COUNTERS] Fortran MEs ( 1 ) : 0.0534s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5250s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4531s + [COUNTERS] Fortran MEs ( 1 ) : 0.0719s for 8192 events => throughput is 1.14E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3225s - [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4812s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] Fortran MEs ( 1 ) : 0.0722s for 8192 events => throughput is 1.13E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.9472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4177s - [COUNTERS] Fortran MEs ( 1 ) : 0.5295s for 81920 events => throughput is 1.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.6071s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8898s + [COUNTERS] Fortran MEs ( 1 ) : 0.7173s for 81920 events => throughput is 1.14E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3887s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3249s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0634s for 8192 events => throughput is 1.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4867s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4094s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4147s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6312s for 81920 events => throughput is 1.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 2.6750s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9006s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7737s for 81920 events => throughput is 1.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323725e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.066197e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.327049e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.081547e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3610s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0361s for 8192 events => throughput is 2.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4594s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4139s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7706s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4098s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3605s for 81920 events => throughput is 2.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.3287s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4278s for 81920 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.284786e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.915335e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.391017e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.909065e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701710149187] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3417s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3230s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 8192 events => throughput is 4.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701710149187) differ by less than 2E-4 (2.8022051345999444e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771374576316] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5974s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1836s for 81920 events => throughput is 4.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.1482s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9012s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2465s for 81920 events => throughput is 3.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771374576316) differ by less than 2E-4 (9.478029472376193e-11) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.538844e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.313762e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.558820e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.370922e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4314s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0223s for 8192 events => throughput is 3.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.1188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9005s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2179s for 81920 events => throughput is 3.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.747505e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.784395e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0355s for 8192 events => throughput is 2.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.2638s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3492s for 81920 events => throughput is 2.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.324769e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.345529e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,32 +524,104 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' -Memory access fault by GPU node-4 (Agent handle: 0x677d050) on address 0x1504b7d35000. Reason: Unknown. - -Program received signal SIGABRT: Process abort signal. - -Backtrace for this error: -#0 0x15074eea52e2 in ??? -#1 0x15074eea4475 in ??? -#2 0x15074ee7490f in ??? -#3 0x15074eb15d2b in ??? -#4 0x15074eb173e4 in ??? -#5 0x150744d7dd1b in ??? -#6 0x150744d77bc8 in ??? -#7 0x150744d299e6 in ??? -#8 0x15074ee686e9 in ??? -#9 0x15074ebe350e in ??? -#10 0xffffffffffffffff in ??? -./madX.sh: line 400: 113249 Aborted $timecmd $cmd < ${tmpin} > ${tmp} -ERROR! ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' failed - PDF set = nn23lo1 - alpha_s(Mz)= 0.1300 running at 2 loops. - alpha_s(Mz)= 0.1300 running at 2 loops. - Renormalization scale set on event-by-event basis - Factorization scale set on event-by-event basis - - - getting user params -Enter number of events and max and min iterations: - Number of events and iterations 8192 1 1 +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8545s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8502s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.3685s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 81920 events => throughput is 7.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.938068e+06 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.128844e+06 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.264635e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 16384 32 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.049713e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.259722e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.250826e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.279255e+07 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.647946e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 3dabe0755c..fad5d1a64f 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:40:16 +DATE: 2024-09-18_17:32:01 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.1585s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1221s - [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9274s + [COUNTERS] Fortran MEs ( 1 ) : 0.0485s for 8192 events => throughput is 1.69E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7186s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6822s - [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.0480s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.2530s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8813s - [COUNTERS] Fortran MEs ( 1 ) : 0.3717s for 81920 events => throughput is 2.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5836s + [COUNTERS] Fortran MEs ( 1 ) : 0.4763s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755334] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7307s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6898s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4083s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0501s for 8192 events => throughput is 1.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755334) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865325] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.0375s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4039s for 81920 events => throughput is 2.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.0776s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5087s for 81920 events => throughput is 1.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865325) differ by less than 3E-14 (1.1102230246251565e-14) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.067860e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.656917e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.073882e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.653176e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755347] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4352s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755347) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865338] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.8916s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6566s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2348s for 81920 events => throughput is 3.49E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8492s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2771s for 81920 events => throughput is 2.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865338) differ by less than 3E-14 (1.0436096431476471e-14) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.562430e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.911436e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.577295e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.988061e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755325] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4084s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755325) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.7585s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1269s for 81920 events => throughput is 6.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7395s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5731s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1660s for 81920 events => throughput is 4.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865552) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.541810e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.731125e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.942521e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.7429s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5869s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1556s for 81920 events => throughput is 5.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.403364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.596517e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.439647e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.8196s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5847s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2345s for 81920 events => throughput is 3.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.483959e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.537527e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755356] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.9989s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0078s + [COUNTERS] PROGRAM TOTAL : 0.8564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8526s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and hip (2.0160081479755356) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865352] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.0429s - [COUNTERS] Fortran Overhead ( 0 ) : 5.0165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 81920 events => throughput is 4.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s + [COUNTERS] PROGRAM TOTAL : 2.0272s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and hip (2.0336713375865352) differ by less than 3E-14 (9.880984919163893e-15) +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.356738e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.829708e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.379534e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.382767e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584016e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.774663e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300114e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.124992e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.592842e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.755835e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.330570e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.430950e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584311e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.756916e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.664638e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.513302e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index c022668690..4984f73b96 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:40:55 +DATE: 2024-09-18_17:32:29 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.0731s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0367s - [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7254s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6890s - [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.0374s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6751s - [COUNTERS] Fortran MEs ( 1 ) : 0.3623s for 81920 events => throughput is 2.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0424s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5675s + [COUNTERS] Fortran MEs ( 1 ) : 0.4749s for 81920 events => throughput is 1.72E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,34 +124,34 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160406546722180] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.7172s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6812s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4547s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160406546722180) differ by less than 4E-4 (1.61242883456314e-05) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499550E+02 0.71320499550E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239731E+02 0.54771239731E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002962E+02 0.63925016178E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762568567E+01 0.62166723103E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02 0.54771239790E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002893E+02 0.63925016162E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762567893E+01 0.62166723101E+02 0.47000000000E+01 0. -1. < < 0 0.12500099E+03 < 0 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 088580d866..e45c8953e0 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:41:04 +DATE: 2024-09-18_17:32:35 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.0767s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0402s - [COUNTERS] Fortran MEs ( 1 ) : 0.0365s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9574s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9096s + [COUNTERS] Fortran MEs ( 1 ) : 0.0478s for 8192 events => throughput is 1.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6731s - [COUNTERS] Fortran MEs ( 1 ) : 0.0364s for 8192 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] Fortran MEs ( 1 ) : 0.0472s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.0318s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6691s - [COUNTERS] Fortran MEs ( 1 ) : 0.3627s for 81920 events => throughput is 2.26E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5636s + [COUNTERS] Fortran MEs ( 1 ) : 0.4741s for 81920 events => throughput is 1.73E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453460] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6802s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0511s for 8192 events => throughput is 1.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453460) differ by less than 2E-4 (2.4042468904639236e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,25 +160,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200616] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.0505s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4049s for 81920 events => throughput is 2.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0631s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5575s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5052s for 81920 events => throughput is 1.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200616) differ by less than 2E-4 (2.297987178323524e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -187,15 +187,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.031830e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.544125e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.034344e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.558149e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,25 +209,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453469] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6983s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4061s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 8192 events => throughput is 2.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453469) differ by less than 2E-4 (2.4042469348728446e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -245,25 +245,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200620] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 5.1226s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2449s for 81920 events => throughput is 3.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8495s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5723s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2768s for 81920 events => throughput is 2.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200620) differ by less than 2E-4 (2.2979872005279844e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -272,15 +272,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.527175e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.837690e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.537960e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.886878e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,25 +294,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974865] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6901s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 8192 events => throughput is 4.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081962974865) differ by less than 2E-4 (2.3969126017320264e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -330,25 +330,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598834] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.7431s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6181s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1248s for 81920 events => throughput is 6.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7266s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1685s for 81920 events => throughput is 4.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713836598834) differ by less than 2E-4 (2.2655247899905362e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -357,23 +357,187 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.727444e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.779100e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.788127e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4240s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0156s for 8192 events => throughput is 5.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.7202s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5626s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1572s for 81920 events => throughput is 5.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.210846e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.761728e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.166322e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4371s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.8082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5683s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2395s for 81920 events => throughput is 3.42E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.192275e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.206869e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -385,30 +549,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081483021464] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 1.0475s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0334s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.26E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0077s + [COUNTERS] PROGRAM TOTAL : 0.8528s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8489s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and hip (2.0160081483021464) differ by less than 2E-4 (1.6200996100224074e-10) +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -420,67 +584,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713380111582] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.9959s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0192s for 81920 events => throughput is 4.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s + [COUNTERS] PROGRAM TOTAL : 2.0087s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.13E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and hip (2.0336713380111582) differ by less than 2E-4 (2.0878654360956261e-10) +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.361816e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.939022e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.392956e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.244768e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577296e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.749489e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.308106e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.094535e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.595061e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.768020e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.923460e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.372316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.570017e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.761406e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.696241e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.486762e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index e6b5f839f6..cf925a09c6 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:43:33 +DATE: 2024-09-18_17:35:48 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.9139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3589s - [COUNTERS] Fortran MEs ( 1 ) : 1.5550s for 8192 events => throughput is 5.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3661s + [COUNTERS] Fortran MEs ( 1 ) : 2.2982s for 8192 events => throughput is 3.56E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.8679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s - [COUNTERS] Fortran MEs ( 1 ) : 1.5810s for 8192 events => throughput is 5.18E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6579s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3588s + [COUNTERS] Fortran MEs ( 1 ) : 2.2991s for 8192 events => throughput is 3.56E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 17.4649s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4800s - [COUNTERS] Fortran MEs ( 1 ) : 15.9850s for 81920 events => throughput is 5.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0583s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0507s + [COUNTERS] Fortran MEs ( 1 ) : 23.0076s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728557E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1861s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3259s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8564s for 8192 events => throughput is 4.41E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s + [COUNTERS] PROGRAM TOTAL : 2.8350s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3611s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4687s for 8192 events => throughput is 3.32E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728557E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898222E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.0082s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4968s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.5079s for 81920 events => throughput is 4.43E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0035s + [COUNTERS] PROGRAM TOTAL : 26.8636s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0489s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8095s for 81920 events => throughput is 3.30E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898222E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.607657e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.460402e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.561572e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.457448e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.5197s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3171s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2005s for 8192 events => throughput is 6.82E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 1.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2927s for 8192 events => throughput is 6.34E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728536E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898275E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 11.0354s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4918s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5415s for 81920 events => throughput is 8.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [COUNTERS] PROGRAM TOTAL : 15.0457s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0488s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.9941s for 81920 events => throughput is 6.30E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898275E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.858280e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.568599e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.830106e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.542585e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728525E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4395s for 8192 events => throughput is 1.86E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.9401s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5754s for 8192 events => throughput is 1.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728525E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898233E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.0773s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6733s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4029s for 81920 events => throughput is 1.86E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.7938s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0478s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.7444s for 81920 events => throughput is 1.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898233E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902213e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.465958e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.451297e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5129s for 8192 events => throughput is 1.60E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 7.1685s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0408s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.1262s for 81920 events => throughput is 1.60E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.660633e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903178e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.663487e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 1.0357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6717s for 8192 events => throughput is 1.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 8.7351s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0383s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.6950s for 81920 events => throughput is 1.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.242378e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.242909e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728514E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7605s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0651s for 8192 events => throughput is 1.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0533s + [COUNTERS] PROGRAM TOTAL : 0.8480s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610362728514E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.5715s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6057s for 81920 events => throughput is 1.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0530s + [COUNTERS] PROGRAM TOTAL : 2.6736s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4774s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1765s for 81920 events => throughput is 4.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926582898244E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.290124e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.229187e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.275778e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.527847e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.619452e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.819324e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.435666e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.226919e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.620393e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.844216e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.610894e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.225190e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.620701e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.847840e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.954475e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.681732e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 2091e4d6ea..d625debf72 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:45:20 +DATE: 2024-09-18_17:38:08 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.8612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2852s - [COUNTERS] Fortran MEs ( 1 ) : 1.5759s for 8192 events => throughput is 5.20E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3559s + [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.0803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3796s - [COUNTERS] Fortran MEs ( 1 ) : 1.7007s for 8192 events => throughput is 4.82E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6503s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3594s + [COUNTERS] Fortran MEs ( 1 ) : 2.2909s for 8192 events => throughput is 3.58E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 17.2537s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4920s - [COUNTERS] Fortran MEs ( 1 ) : 15.7617s for 81920 events => throughput is 5.20E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0293s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0436s + [COUNTERS] Fortran MEs ( 1 ) : 22.9857s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381684214474469E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.0939s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3042s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7855s for 8192 events => throughput is 4.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0042s + [COUNTERS] PROGRAM TOTAL : 2.7985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3626s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4310s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381684214474469E-007) differ by less than 4E-4 (9.668786189465095e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542976447681378E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 19.5765s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4924s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.0804s for 81920 events => throughput is 4.53E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0036s + [COUNTERS] PROGRAM TOTAL : 26.3775s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0419s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.3307s for 81920 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542976447681378E-007) differ by less than 4E-4 (6.514616746056134e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.699917e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.486852e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.716433e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.494086e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381673102586798E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7975s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4895s for 8192 events => throughput is 1.67E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.0412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6777s for 8192 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381673102586798E-007) differ by less than 4E-4 (8.214000459805249e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542965612263376E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4898s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9413s for 81920 events => throughput is 1.66E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 8.8470s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0545s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7910s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542965612263376E-007) differ by less than 4E-4 (5.09901657563816e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.671993e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.236746e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674722e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.228135e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381674937970992E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7009s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2305s for 8192 events => throughput is 3.55E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6595s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2926s for 8192 events => throughput is 2.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381674937970992E-007) differ by less than 4E-4 (8.454291831050398e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542993199513089E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.7840s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4892s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2941s for 81920 events => throughput is 3.57E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 5.0070s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0356s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.9705s for 81920 events => throughput is 2.76E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542993199513089E-007) differ by less than 4E-4 (8.703170601975785e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.680556e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.852598e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.877017e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2632s for 8192 events => throughput is 3.11E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 4.6833s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0329s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.6497s for 81920 events => throughput is 3.09E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.188563e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.684569e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.189121e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3373s for 8192 events => throughput is 2.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 5.4012s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0331s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3672s for 81920 events => throughput is 2.43E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.455496e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.453904e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381687553340853E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0479s for 8192 events => throughput is 1.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0538s + [COUNTERS] PROGRAM TOTAL : 0.8395s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8026s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381687553340853E-007) differ by less than 4E-4 (1.0105915801972287e-06) +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543007309341497E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.1839s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3184s for 81920 events => throughput is 2.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0527s + [COUNTERS] PROGRAM TOTAL : 2.6550s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1594s for 81920 events => throughput is 5.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6543007309341497E-007) differ by less than 4E-4 (1.0546558233404113e-06) +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.348920e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.221158e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.428627e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.431078e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015915e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.300814e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.017256e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323922e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015001e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.295837e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.020010e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.322906e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013211e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.292673e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.487524e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.656202e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index a3e817e7ae..e6874f3a32 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:46:47 +DATE: 2024-09-18_17:40:03 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.8701s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2841s - [COUNTERS] Fortran MEs ( 1 ) : 1.5859s for 8192 events => throughput is 5.17E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6497s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3549s + [COUNTERS] Fortran MEs ( 1 ) : 2.2948s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.8803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2969s - [COUNTERS] Fortran MEs ( 1 ) : 1.5834s for 8192 events => throughput is 5.17E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.6518s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3572s + [COUNTERS] Fortran MEs ( 1 ) : 2.2945s for 8192 events => throughput is 3.57E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 17.4312s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4913s - [COUNTERS] Fortran MEs ( 1 ) : 15.9399s for 81920 events => throughput is 5.14E+03 events/s + [COUNTERS] PROGRAM TOTAL : 25.0481s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0523s + [COUNTERS] Fortran MEs ( 1 ) : 22.9958s for 81920 events => throughput is 3.56E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608764955570E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1419s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3091s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8290s for 8192 events => throughput is 4.48E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0038s + [COUNTERS] PROGRAM TOTAL : 2.8581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3622s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4908s for 8192 events => throughput is 3.29E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608764955570E-007) differ by less than 2E-4 (2.0918293763827478e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925018181723E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.1982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5029s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.6915s for 81920 events => throughput is 4.38E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s + [COUNTERS] PROGRAM TOTAL : 27.0498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0486s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.9962s for 81920 events => throughput is 3.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925018181723E-007) differ by less than 2E-4 (2.0442339820903044e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.596590e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.436817e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.600471e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.435512e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608686521537E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.2415s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9349s for 8192 events => throughput is 8.76E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.6335s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3586s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2723s for 8192 events => throughput is 6.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608686521537E-007) differ by less than 2E-4 (2.194516446341055e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542924921991233E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 10.8267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5226s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3021s for 81920 events => throughput is 8.81E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 14.8859s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0643s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.8189s for 81920 events => throughput is 6.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542924921991233E-007) differ by less than 2E-4 (2.1699026797605825e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.009209e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.796385e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.006276e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.784638e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200382E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7302s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4382s for 8192 events => throughput is 1.87E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 0.9347s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5694s for 8192 events => throughput is 1.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608826200382E-007) differ by less than 2E-4 (2.0116467158715068e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010384E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.1080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5753s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5316s for 81920 events => throughput is 1.81E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.7335s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0485s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.6834s for 81920 events => throughput is 1.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925056010384E-007) differ by less than 2E-4 (1.9948124929669575e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.954394e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.478402e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.916441e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.467295e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8664s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3632s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5018s for 8192 events => throughput is 1.63E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 7.0416s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0429s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9973s for 81920 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.693290e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.684165e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610372590265E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7523s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6344s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0647s for 8192 events => throughput is 1.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0532s + [COUNTERS] PROGRAM TOTAL : 1.0424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3606s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6800s for 8192 events => throughput is 1.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610372590265E-007) differ by less than 2E-4 (1.2911138824733825e-10) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +479,149 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926581386322E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.4759s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8191s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6025s for 81920 events => throughput is 1.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0543s + [COUNTERS] PROGRAM TOTAL : 8.8421s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0543s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7861s for 81920 events => throughput is 1.21E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.218603e+04 ) sec^-1 -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.233808e+04 ) sec^-1 -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926581386322E-007) differ by less than 2E-4 (1.9752643964920935e-11) +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 8192 events => throughput is 4.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 2.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1767s for 81920 events => throughput is 4.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.295429e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.207155e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.278295e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.529302e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.606939e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.824963e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.436405e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.207520e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.607369e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.824989e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.609351e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.199605e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.602579e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.829686e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.957889e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.672241e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 43f1f62670..a3ffe665a4 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_23:42:39 +DATE: 2024-09-18_17:34:28 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6314s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6253s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6849s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3272s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3218s - [COUNTERS] Fortran MEs ( 1 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4125s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.64E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2367s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1766s - [COUNTERS] Fortran MEs ( 1 ) : 0.0601s for 81920 events => throughput is 1.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6464s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5624s + [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4193s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2610s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0740s for 81920 events => throughput is 1.11E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6471s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5643s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160346e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.967649e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172468e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.004982e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3512s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.21E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4121s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.80E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1909s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 81920 events => throughput is 2.30E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6254s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 81920 events => throughput is 1.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.477602e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.903119e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.535827e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.966510e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3494s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3470s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4171s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207294] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2174s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1961s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 81920 events => throughput is 3.90E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.5995s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0287s for 81920 events => throughput is 2.85E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207294) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.300387e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.156830e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.269475e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4140s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.5963s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5689s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.272274e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.499613e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.518220e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4193s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4158s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6175s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5862s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 81920 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.808476e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.104718e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6578s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6457s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0066s + [COUNTERS] PROGRAM TOTAL : 0.8589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449452343426120) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5016s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.93E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0064s + [COUNTERS] PROGRAM TOTAL : 2.0146s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.04E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.960174e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.114496e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342641e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.313778e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.418677e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.335231e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.644247e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.321322e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.424249e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.755462e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.758689e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.202913e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.384808e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.640874e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.177573e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index b057d3eb24..6af3b55835 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_23:42:57 +DATE: 2024-09-18_17:34:55 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.5186s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5125s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6749s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.63E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3303s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3242s - [COUNTERS] Fortran MEs ( 1 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4230s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4146s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2369s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1766s - [COUNTERS] Fortran MEs ( 1 ) : 0.0603s for 81920 events => throughput is 1.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6556s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5716s + [COUNTERS] Fortran MEs ( 1 ) : 0.0840s for 81920 events => throughput is 9.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446601800423] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3427s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446601800423) differ by less than 4E-4 (1.8856252759213987e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305123565710] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2649s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2015s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0633s for 81920 events => throughput is 1.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6758s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5913s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0842s for 81920 events => throughput is 9.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305123565710) differ by less than 4E-4 (1.8208556928911435e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.384737e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.013895e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.394396e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.014072e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446481959741] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3438s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3416s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.84E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4141s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.96E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446481959741) differ by less than 4E-4 (1.924982528933583e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305120129920] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2097s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1894s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 81920 events => throughput is 4.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6056s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5782s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 81920 events => throughput is 3.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305120129920) differ by less than 4E-4 (1.8219731212631984e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.493283e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.210079e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.612914e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.272367e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446707997274] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.72E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446707997274) differ by less than 4E-4 (1.8507488352970114e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305200358782] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2303s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 81920 events => throughput is 5.96E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6070s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 81920 events => throughput is 4.35E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305200358782) differ by less than 4E-4 (1.7958801523665358e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.770097e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.874017e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.300612e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4142s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4122s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.54E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.5999s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0179s for 81920 events => throughput is 4.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.302533e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.178956e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.611044e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4176s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4150s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6057s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 81920 events => throughput is 3.82E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.359914e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.606033e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446257236112] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6446s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s + [COUNTERS] PROGRAM TOTAL : 0.8553s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8518s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449446257236112) differ by less than 4E-4 (1.998784719958735e-07) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747304644712603] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5658s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.04E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 2.0187s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0105s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.06E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747304644712603) differ by less than 4E-4 (1.9765939007765354e-07) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.558902e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.132925e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.658289e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.463748e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.011473e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.452376e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.555927e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.074682e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.215146e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.432547e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.540892e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.996097e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.596411e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.096714e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.881194e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.649902e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index bd5c094abb..d3c2ed78ae 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-09-18_23:43:15 +DATE: 2024-09-18_17:35:21 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.5137s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5079s - [COUNTERS] Fortran MEs ( 1 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6841s + [COUNTERS] Fortran MEs ( 1 ) : 0.0087s for 8192 events => throughput is 9.39E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3338s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3277s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4135s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2345s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1750s - [COUNTERS] Fortran MEs ( 1 ) : 0.0595s for 81920 events => throughput is 1.38E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5746s + [COUNTERS] Fortran MEs ( 1 ) : 0.0847s for 81920 events => throughput is 9.68E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3582s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3505s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4261s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2694s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1945s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0747s for 81920 events => throughput is 1.10E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6599s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0848s for 81920 events => throughput is 9.66E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150780e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.812593e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.165133e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.833045e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4216s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4168s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2237s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1890s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 81920 events => throughput is 2.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0453s for 81920 events => throughput is 1.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.685612e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.934567e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.739132e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.973267e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453251780906] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4226s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.90E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.92E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453251780906) differ by less than 2E-4 (2.98315638858071e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311628550072] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.2074s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 81920 events => throughput is 4.08E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6044s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 81920 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311628550072) differ by less than 2E-4 (2.947714006218405e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.508719e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.181731e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.488610e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4215s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.97E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6145s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5872s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 81920 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.215852e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.662207e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.572337e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4167s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.60E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5806s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0301s for 81920 events => throughput is 2.73E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.871798e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.069203e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452360186241] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6741s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0065s + [COUNTERS] PROGRAM TOTAL : 0.8557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8521s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449452360186241) differ by less than 2E-4 (5.504243727472158e-10) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310720557375] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5096s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4940s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s + [COUNTERS] PROGRAM TOTAL : 2.0158s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747310720557375) differ by less than 2E-4 (5.366040944920769e-11) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.570605e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.203370e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.619962e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.488049e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.249341e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.465715e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.345507e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.776763e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.257709e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.434433e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.785764e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.877036e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.388728e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.445366e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.652732e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.175182e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index a40b232bb0..e14403d083 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:41:43 +DATE: 2024-09-18_17:33:04 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8394s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8085s - [COUNTERS] Fortran MEs ( 1 ) : 0.0309s for 8192 events => throughput is 2.65E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8478s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8053s + [COUNTERS] Fortran MEs ( 1 ) : 0.0425s for 8192 events => throughput is 1.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3585s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3262s - [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.4874s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1704s - [COUNTERS] Fortran MEs ( 1 ) : 0.3171s for 81920 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9546s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5320s + [COUNTERS] Fortran MEs ( 1 ) : 0.4226s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4564s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4114s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0447s for 8192 events => throughput is 1.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846950) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5327s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3526s for 81920 events => throughput is 2.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9966s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4479s for 81920 events => throughput is 1.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.346198e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.854967e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.355296e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.848854e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3675s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4357s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 8192 events => throughput is 3.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3846s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2088s for 81920 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8082s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5526s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2552s for 81920 events => throughput is 3.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.976615e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.238674e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.995377e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.323356e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3623s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.63E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4251s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,36 +329,110 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5417s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4046s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1369s for 81920 events => throughput is 5.99E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6875s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5321s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1550s for 81920 events => throughput is 5.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.876235e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.205465e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.506247e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.341963e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6853s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1419s for 81920 events => throughput is 5.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.704465e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.807181e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +444,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +454,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6778s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.30E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0069s + [COUNTERS] PROGRAM TOTAL : 0.4336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.55E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641911695846950) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +479,89 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.7690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2250s for 81920 events => throughput is 3.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.577527e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.629127e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8454s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.4598s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4351s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0174s for 81920 events => throughput is 4.71E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s + [COUNTERS] PROGRAM TOTAL : 1.9952s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9855s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 81920 events => throughput is 9.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and hip (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.361469e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.949285e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.393659e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.317105e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.804930e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.829932e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.565191e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.548750e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.834506e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.793745e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303015e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.913836e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.811147e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.818636e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.075363e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.654381e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 9633ce81ed..a972218890 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:42:02 +DATE: 2024-09-18_17:33:33 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.6079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5786s - [COUNTERS] Fortran MEs ( 1 ) : 0.0293s for 8192 events => throughput is 2.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8346s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3537s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3240s - [COUNTERS] Fortran MEs ( 1 ) : 0.0298s for 8192 events => throughput is 2.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4474s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4056s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.4902s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1703s - [COUNTERS] Fortran MEs ( 1 ) : 0.3199s for 81920 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5332s + [COUNTERS] Fortran MEs ( 1 ) : 0.4202s for 81920 events => throughput is 1.95E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641905397892330] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4103s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641905397892330) differ by less than 4E-4 (1.4107717127842534e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258075185306] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5058s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3155s for 81920 events => throughput is 2.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9508s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5325s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4180s for 81920 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473258075185306) differ by less than 4E-4 (1.465433093761348e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.618196e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.983462e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.656559e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.977072e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902617887730] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641902617887730) differ by less than 4E-4 (2.0335059314202653e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255619824656] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3344s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1473s for 81920 events => throughput is 5.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.7014s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1712s for 81920 events => throughput is 4.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473255619824656) differ by less than 4E-4 (2.0175312298587045e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.888623e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.686204e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.714344e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.729824e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902771385062] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3482s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4171s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0089s for 8192 events => throughput is 9.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641902771385062) differ by less than 4E-4 (1.9991218003223565e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255186065366] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2562s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1834s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0726s for 81920 events => throughput is 1.13E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6247s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 81920 events => throughput is 9.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473255186065366) differ by less than 4E-4 (2.1150638251921094e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.181203e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.731978e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.141403e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4099s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6246s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5387s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0857s for 81920 events => throughput is 9.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.618487e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.191155e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.810592e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.53E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6706s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5504s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1199s for 81920 events => throughput is 6.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.869027e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.899392e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641905467548966] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6481s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6360s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0070s + [COUNTERS] PROGRAM TOTAL : 0.8527s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8491s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641905467548966) differ by less than 4E-4 (1.3951682953372568e-07) +OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473257658055729] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 81920 events => throughput is 9.33E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s + [COUNTERS] PROGRAM TOTAL : 1.9894s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9808s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.03E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and hip (44.473257658055729) differ by less than 4E-4 (1.5592264279717938e-07) +OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.671756e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.991468e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.663884e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.344514e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.230603e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.881682e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.789505e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.350971e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.128565e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.826185e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.600395e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.350870e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.735657e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.507679e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.320347e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.018982e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 1f9016e379..f3cbf0c54f 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-09-18_23:42:20 +DATE: 2024-09-18_17:34:00 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.6327s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6008s - [COUNTERS] Fortran MEs ( 1 ) : 0.0318s for 8192 events => throughput is 2.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8326s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7908s + [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3618s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s - [COUNTERS] Fortran MEs ( 1 ) : 0.0319s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4514s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4081s + [COUNTERS] Fortran MEs ( 1 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7631s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4052s - [COUNTERS] Fortran MEs ( 1 ) : 0.3579s for 81920 events => throughput is 2.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9646s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s + [COUNTERS] Fortran MEs ( 1 ) : 0.4230s for 81920 events => throughput is 1.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404211] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 8192 events => throughput is 2.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4612s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4163s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0445s for 8192 events => throughput is 1.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404211) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1902s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3568s for 81920 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9868s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5362s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4502s for 81920 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.316075e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.844411e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.332670e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.856447e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404225] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3652s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3440s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4119s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 8192 events => throughput is 3.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404225) differ by less than 2E-4 (2.7833872318083763e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265850735238] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3961s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2091s for 81920 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7808s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5352s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2452s for 81920 events => throughput is 3.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735238) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.994557e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.286947e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.010766e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.353817e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912966309015] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3491s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4259s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912966309015) differ by less than 2E-4 (2.8458952971988083e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265882025295] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2345s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1223s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1120s for 81920 events => throughput is 7.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6932s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 81920 events => throughput is 5.39E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265882025295) differ by less than 2E-4 (2.899676077028346e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.182707e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.355089e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.929158e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.376897e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6947s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5520s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1424s for 81920 events => throughput is 5.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.878311e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.966522e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +444,89 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4311s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4086s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.7576s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2187s for 81920 events => throughput is 3.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.691521e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.773666e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +534,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s + [COUNTERS] PROGRAM TOTAL : 0.8481s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.54E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641911674225568) differ by less than 2E-4 (4.843292433776014e-10) +OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3998s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3749s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 81920 events => throughput is 4.73E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0077s + [COUNTERS] PROGRAM TOTAL : 1.9812s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9714s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.08E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and hip (44.473264587763374) differ by less than 2E-4 (1.0526113314313079e-10) +OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.356779e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.958191e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.429203e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.401140e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.784466e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.815576e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.529124e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.499893e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.775937e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.820308e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.224497e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.845220e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.763929e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.813891e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.982810e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.729165e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 2360ada0f2..95eb3e309d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:11:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:08:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.191952e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.831109e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.948248e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.637744 sec -INFO: No Floating Point Exceptions have been reported - 1,376,729,510 cycles:u # 1.761 GHz (75.68%) - 2,773,495 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.25%) - 7,573,940 stalled-cycles-backend:u # 0.55% backend cycles idle (73.51%) - 2,354,068,706 instructions:u # 1.71 insn per cycle - # 0.00 stalled cycles per insn (73.83%) - 0.966210404 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.586175e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.543752e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.774580e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.722428 sec +INFO: No Floating Point Exceptions have been reported + 2,618,484,542 cycles # 2.848 GHz + 4,056,431,697 instructions # 1.55 insn per cycle + 1.017935073 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.222095e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.399638e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.399638e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.980544 sec -INFO: No Floating Point Exceptions have been reported - 17,714,141,475 cycles:u # 2.954 GHz (74.92%) - 50,088,011 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.98%) - 300,941,690 stalled-cycles-backend:u # 1.70% backend cycles idle (75.04%) - 47,054,202,718 instructions:u # 2.66 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 6.069874876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.036688e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208211e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208211e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.477862 sec +INFO: No Floating Point Exceptions have been reported + 19,055,513,200 cycles # 2.940 GHz + 46,088,548,361 instructions # 2.42 insn per cycle + 6.483409710 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.762520e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.209848e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.209848e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.411264 sec -INFO: No Floating Point Exceptions have been reported - 12,828,414,164 cycles:u # 2.908 GHz (75.03%) - 48,623,701 stalled-cycles-frontend:u # 0.38% frontend cycles idle (75.10%) - 491,523,625 stalled-cycles-backend:u # 3.83% backend cycles idle (75.04%) - 31,720,119,842 instructions:u # 2.47 insn per cycle - # 0.02 stalled cycles per insn (74.98%) - 4.527284897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.577995e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.056243e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.056243e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.401175 sec +INFO: No Floating Point Exceptions have been reported + 12,945,161,675 cycles # 2.938 GHz + 31,621,534,754 instructions # 2.44 insn per cycle + 4.406822784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.467999e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.317943e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.317943e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.375478 sec -INFO: No Floating Point Exceptions have been reported - 9,665,781,968 cycles:u # 2.853 GHz (74.99%) - 49,195,440 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.98%) - 913,032,881 stalled-cycles-backend:u # 9.45% backend cycles idle (74.98%) - 19,496,960,664 instructions:u # 2.02 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 3.436785449 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.979178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.760192e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.760192e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.589167 sec +INFO: No Floating Point Exceptions have been reported + 10,070,726,803 cycles # 2.802 GHz + 19,587,544,877 instructions # 1.94 insn per cycle + 3.594697986 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.973756e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755912e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755912e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.601272 sec +INFO: No Floating Point Exceptions have been reported + 9,893,708,282 cycles # 2.744 GHz + 19,261,714,155 instructions # 1.95 insn per cycle + 3.606677205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.684138e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.223088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.223088e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.152604 sec +INFO: No Floating Point Exceptions have been reported + 8,635,892,874 cycles # 2.077 GHz + 15,755,316,929 instructions # 1.82 insn per cycle + 4.158382190 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 1a1cae9db4..7e1127db04 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:05:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:51:56 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.831196e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.590388e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.590388e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.533079 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,064,474,817 cycles:u # 3.248 GHz (75.09%) - 111,016,051 stalled-cycles-frontend:u # 0.61% frontend cycles idle (75.13%) - 6,747,543,981 stalled-cycles-backend:u # 37.35% backend cycles idle (74.93%) - 16,621,660,063 instructions:u # 0.92 insn per cycle - # 0.41 stalled cycles per insn (74.82%) - 5.601394897 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.746451e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921944e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.921944e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.223700 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,220,060,160 cycles # 2.915 GHz + 13,018,391,047 instructions # 1.80 insn per cycle + 2.533250665 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.347971e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546387e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.546387e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.517064 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,168,819,111 cycles:u # 3.282 GHz (75.00%) - 46,724,707 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.01%) - 305,030,598 stalled-cycles-backend:u # 1.68% backend cycles idle (75.02%) - 47,372,612,878 instructions:u # 2.61 insn per cycle - # 0.01 stalled cycles per insn (75.02%) - 5.541617287 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.004941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165788e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165788e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.898046 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 20,322,657,427 cycles # 2.944 GHz + 46,321,216,193 instructions # 2.28 insn per cycle + 6.904944789 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.898977e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.367969e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.367969e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.204327 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,584,919,540 cycles:u # 3.217 GHz (75.00%) - 50,454,529 stalled-cycles-frontend:u # 0.37% frontend cycles idle (75.01%) - 586,098,778 stalled-cycles-backend:u # 4.31% backend cycles idle (75.01%) - 32,631,269,808 instructions:u # 2.40 insn per cycle - # 0.02 stalled cycles per insn (75.00%) - 4.228292139 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.496333e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.925589e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.925589e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.836326 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 14,226,515,518 cycles # 2.937 GHz + 32,466,683,813 instructions # 2.28 insn per cycle + 4.843971134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.580001e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.427352e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.427352e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.333102 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,363,050,041 cycles:u # 3.091 GHz (74.98%) - 51,426,741 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.97%) - 966,849,777 stalled-cycles-backend:u # 9.33% backend cycles idle (74.95%) - 20,362,478,699 instructions:u # 1.96 insn per cycle - # 0.05 stalled cycles per insn (74.94%) - 3.357219989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.825666e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.487837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.487837e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.083516 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,316,310,914 cycles # 2.767 GHz + 20,951,601,246 instructions # 1.85 insn per cycle + 4.090897830 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.895357e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.603837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603837e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.954670 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,188,953,637 cycles # 2.824 GHz + 20,622,311,623 instructions # 1.84 insn per cycle + 3.962452110 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.623904e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.111036e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.111036e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.511201 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,933,844,941 cycles # 2.199 GHz + 16,904,875,780 instructions # 1.70 insn per cycle + 4.518707685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 7a06736985..d91c4828d9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:11:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:04:11 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.197443e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.883160e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.001388e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.531377e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.591267e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.748328e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.752319 sec -INFO: No Floating Point Exceptions have been reported - 15,508,972,207 cycles:u # 3.247 GHz (74.90%) - 53,853,359 stalled-cycles-frontend:u # 0.35% frontend cycles idle (74.91%) - 6,687,172,993 stalled-cycles-backend:u # 43.12% backend cycles idle (75.04%) - 11,593,559,431 instructions:u # 0.75 insn per cycle - # 0.58 stalled cycles per insn (75.01%) - 4.807135237 seconds time elapsed +TOTAL : 1.353084 sec +INFO: No Floating Point Exceptions have been reported + 4,633,251,875 cycles # 2.904 GHz + 7,212,974,866 instructions # 1.56 insn per cycle + 1.652016166 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.365933e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568505e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568505e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.029394e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199449e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199449e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.347034 sec -INFO: No Floating Point Exceptions have been reported - 17,796,682,629 cycles:u # 3.322 GHz (74.93%) - 50,746,150 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.94%) - 238,242,947 stalled-cycles-backend:u # 1.34% backend cycles idle (75.01%) - 47,050,275,770 instructions:u # 2.64 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 5.359517604 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.897530 sec +INFO: No Floating Point Exceptions have been reported + 20,162,123,319 cycles # 2.922 GHz + 46,195,009,239 instructions # 2.29 insn per cycle + 6.903032860 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.013710e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510635e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.510635e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.570184e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049072e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049072e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.861663 sec -INFO: No Floating Point Exceptions have been reported - 12,667,828,509 cycles:u # 3.272 GHz (75.00%) - 51,640,119 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.00%) - 477,608,047 stalled-cycles-backend:u # 3.77% backend cycles idle (75.00%) - 31,726,703,174 instructions:u # 2.50 insn per cycle - # 0.02 stalled cycles per insn (75.00%) - 3.874086240 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.798502 sec +INFO: No Floating Point Exceptions have been reported + 14,063,092,419 cycles # 2.928 GHz + 31,626,728,543 instructions # 2.25 insn per cycle + 4.804471582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.716466e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.642060e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.642060e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.970535e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.757812e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.757812e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.052112 sec -INFO: No Floating Point Exceptions have been reported - 9,828,986,051 cycles:u # 3.210 GHz (74.92%) - 51,915,511 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.81%) - 959,107,572 stalled-cycles-backend:u # 9.76% backend cycles idle (74.81%) - 19,515,562,689 instructions:u # 1.99 insn per cycle - # 0.05 stalled cycles per insn (75.04%) - 3.064343742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +TOTAL : 3.978190 sec +INFO: No Floating Point Exceptions have been reported + 11,201,119,802 cycles # 2.813 GHz + 19,490,103,913 instructions # 1.74 insn per cycle + 3.984105389 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.023963e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.847631e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.847631e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.897854 sec +INFO: No Floating Point Exceptions have been reported + 11,011,148,409 cycles # 2.821 GHz + 18,950,488,449 instructions # 1.72 insn per cycle + 3.903822013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.727909e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.291192e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.291192e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.434907 sec +INFO: No Floating Point Exceptions have been reported + 9,769,161,551 cycles # 2.200 GHz + 15,456,644,765 instructions # 1.58 insn per cycle + 4.440874371 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 832280b89b..95f355ef67 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:09:14 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:58:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.802313e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.853145e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.971655e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.537151 sec -INFO: No Floating Point Exceptions have been reported - 17,595,795,010 cycles:u # 3.161 GHz (74.88%) - 112,097,181 stalled-cycles-frontend:u # 0.64% frontend cycles idle (74.80%) - 6,681,120,017 stalled-cycles-backend:u # 37.97% backend cycles idle (75.03%) - 16,149,336,211 instructions:u # 0.92 insn per cycle - # 0.41 stalled cycles per insn (75.14%) - 5.661018651 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.092066e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.598729e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.734259e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.884241 sec +INFO: No Floating Point Exceptions have been reported + 6,167,226,842 cycles # 2.911 GHz + 11,436,463,316 instructions # 1.85 insn per cycle + 2.174841291 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.319894e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.513702e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.513702e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.527009 sec -INFO: No Floating Point Exceptions have been reported - 17,866,125,388 cycles:u # 3.227 GHz (75.01%) - 50,618,831 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.01%) - 308,495,653 stalled-cycles-backend:u # 1.73% backend cycles idle (75.01%) - 47,204,169,830 instructions:u # 2.64 insn per cycle - # 0.01 stalled cycles per insn (75.01%) - 5.539238754 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.035778e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.207383e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.207383e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.487731 sec +INFO: No Floating Point Exceptions have been reported + 19,058,569,596 cycles # 2.936 GHz + 46,087,741,277 instructions # 2.42 insn per cycle + 6.493592711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.021747e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.523645e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.523645e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.851218 sec -INFO: No Floating Point Exceptions have been reported - 12,650,891,754 cycles:u # 3.276 GHz (74.93%) - 51,204,488 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.94%) - 478,826,954 stalled-cycles-backend:u # 3.78% backend cycles idle (74.95%) - 31,826,900,534 instructions:u # 2.52 insn per cycle - # 0.02 stalled cycles per insn (74.94%) - 3.863373441 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.566016e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044387e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044387e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.436390 sec +INFO: No Floating Point Exceptions have been reported + 12,971,922,098 cycles # 2.921 GHz + 31,622,790,809 instructions # 2.44 insn per cycle + 4.442502369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.777286e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.747102e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.747102e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.002515 sec -INFO: No Floating Point Exceptions have been reported - 9,691,495,078 cycles:u # 3.217 GHz (74.98%) - 49,192,845 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.04%) - 907,793,158 stalled-cycles-backend:u # 9.37% backend cycles idle (75.04%) - 19,510,741,214 instructions:u # 2.01 insn per cycle - # 0.05 stalled cycles per insn (75.04%) - 3.014576983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.978030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.768932e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.768932e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.592451 sec +INFO: No Floating Point Exceptions have been reported + 10,115,574,971 cycles # 2.812 GHz + 19,587,420,856 instructions # 1.94 insn per cycle + 3.598300355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.014830e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.827477e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.827477e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.531675 sec +INFO: No Floating Point Exceptions have been reported + 9,897,196,547 cycles # 2.799 GHz + 19,249,419,683 instructions # 1.94 insn per cycle + 3.537559003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.720646e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.279247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.279247e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.067635 sec +INFO: No Floating Point Exceptions have been reported + 8,664,270,263 cycles # 2.127 GHz + 15,755,691,110 instructions # 1.82 insn per cycle + 4.073643316 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 216a9f2843..e73a9b015a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:12:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:09:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.463295e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.380629e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.522784e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.561438 sec -INFO: No Floating Point Exceptions have been reported - 1,323,775,686 cycles:u # 2.265 GHz (75.31%) - 2,550,426 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.27%) - 8,480,241 stalled-cycles-backend:u # 0.64% backend cycles idle (74.97%) - 2,375,555,887 instructions:u # 1.79 insn per cycle - # 0.00 stalled cycles per insn (72.80%) - 0.707689347 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.079594e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.670378e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.825463e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.680250 sec +INFO: No Floating Point Exceptions have been reported + 2,578,534,884 cycles # 2.821 GHz + 4,030,538,684 instructions # 1.56 insn per cycle + 0.973967444 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.218296e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.395918e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.395918e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.995484 sec -INFO: No Floating Point Exceptions have been reported - 17,735,182,931 cycles:u # 2.952 GHz (74.98%) - 49,633,694 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.98%) - 806,527,546 stalled-cycles-backend:u # 4.55% backend cycles idle (74.98%) - 46,692,259,990 instructions:u # 2.63 insn per cycle - # 0.02 stalled cycles per insn (74.98%) - 6.083852725 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 489) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.022698e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.191211e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.191211e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.567027 sec +INFO: No Floating Point Exceptions have been reported + 19,075,762,627 cycles # 2.903 GHz + 46,055,106,551 instructions # 2.41 insn per cycle + 6.572547698 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.807187e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.258576e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.258576e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.310768 sec -INFO: No Floating Point Exceptions have been reported - 12,550,862,557 cycles:u # 2.903 GHz (75.02%) - 50,726,873 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.02%) - 291,932,402 stalled-cycles-backend:u # 2.33% backend cycles idle (75.02%) - 31,508,960,309 instructions:u # 2.51 insn per cycle - # 0.01 stalled cycles per insn (75.02%) - 4.414876736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1605) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.585711e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.070341e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.070341e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.383967 sec +INFO: No Floating Point Exceptions have been reported + 12,890,625,740 cycles # 2.937 GHz + 31,557,909,117 instructions # 2.45 insn per cycle + 4.389588631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.592638e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.453585e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.453585e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.200516 sec -INFO: No Floating Point Exceptions have been reported - 9,890,103,583 cycles:u # 3.075 GHz (75.00%) - 47,965,827 stalled-cycles-frontend:u # 0.48% frontend cycles idle (74.90%) - 314,081,164 stalled-cycles-backend:u # 3.18% backend cycles idle (74.89%) - 19,346,133,777 instructions:u # 1.96 insn per cycle - # 0.02 stalled cycles per insn (74.95%) - 3.344858347 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.969969e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.755961e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.755961e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.604328 sec +INFO: No Floating Point Exceptions have been reported + 10,100,174,359 cycles # 2.799 GHz + 19,576,296,506 instructions # 1.94 insn per cycle + 3.609879791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1894) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.022206e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.841390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.841390e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.520245 sec +INFO: No Floating Point Exceptions have been reported + 9,894,539,917 cycles # 2.807 GHz + 19,271,397,768 instructions # 1.95 insn per cycle + 3.525910639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.762660e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347769e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347769e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.982657 sec +INFO: No Floating Point Exceptions have been reported + 8,470,289,841 cycles # 2.124 GHz + 15,587,855,124 instructions # 1.84 insn per cycle + 3.988212621 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 833) (512y: 153) (512z: 1240) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index f97660e788..8184b4eff2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:55:59 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:42:17 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.212705e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.866236e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.984628e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.535700 sec -INFO: No Floating Point Exceptions have been reported - 1,422,688,283 cycles:u # 2.563 GHz (74.91%) - 2,445,274 stalled-cycles-frontend:u # 0.17% frontend cycles idle (76.26%) - 6,030,553 stalled-cycles-backend:u # 0.42% backend cycles idle (76.20%) - 2,382,760,221 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (74.79%) - 0.600574954 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.357145e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.547980e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.727026e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.689616 sec +INFO: No Floating Point Exceptions have been reported + 2,681,392,745 cycles # 2.885 GHz + 4,097,806,151 instructions # 1.53 insn per cycle + 0.986657014 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.911736e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.338775e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.338775e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.044684 sec -INFO: No Floating Point Exceptions have been reported - 13,050,068,497 cycles:u # 3.218 GHz (74.97%) - 50,107,237 stalled-cycles-frontend:u # 0.38% frontend cycles idle (74.95%) - 115,123,066 stalled-cycles-backend:u # 0.88% backend cycles idle (74.97%) - 36,903,248,617 instructions:u # 2.83 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 4.060765563 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.608983e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.060555e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060555e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.321503 sec +INFO: No Floating Point Exceptions have been reported + 12,686,452,587 cycles # 2.933 GHz + 32,573,246,433 instructions # 2.57 insn per cycle + 4.326967751 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.605004e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533754e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533754e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.155865 sec -INFO: No Floating Point Exceptions have been reported - 10,148,435,458 cycles:u # 3.205 GHz (75.01%) - 46,634,347 stalled-cycles-frontend:u # 0.46% frontend cycles idle (74.99%) - 104,294,136 stalled-cycles-backend:u # 1.03% backend cycles idle (74.99%) - 24,440,653,533 instructions:u # 2.41 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 3.170871547 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.001283e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.839506e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.839506e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.560441 sec +INFO: No Floating Point Exceptions have been reported + 10,462,099,873 cycles # 2.934 GHz + 24,899,188,532 instructions # 2.38 insn per cycle + 3.566316228 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.188423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.546970e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.546970e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.710085 sec -INFO: No Floating Point Exceptions have been reported - 8,663,850,993 cycles:u # 3.185 GHz (75.01%) - 51,683,213 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.01%) - 70,391,017 stalled-cycles-backend:u # 0.81% backend cycles idle (75.01%) - 16,864,309,931 instructions:u # 1.95 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 2.725041597 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2981) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.199006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.213700e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.213700e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.271279 sec +INFO: No Floating Point Exceptions have been reported + 9,171,998,387 cycles # 2.800 GHz + 16,835,147,245 instructions # 1.84 insn per cycle + 3.276861848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1599) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.270242e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.359980e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.359980e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.184078 sec +INFO: No Floating Point Exceptions have been reported + 8,899,793,398 cycles # 2.791 GHz + 16,396,706,280 instructions # 1.84 insn per cycle + 3.189617083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1355) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.962735e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.715557e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.715557e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.614098 sec +INFO: No Floating Point Exceptions have been reported + 7,891,427,724 cycles # 2.181 GHz + 14,556,226,424 instructions # 1.84 insn per cycle + 3.619718707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 946) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 0634ce5a2b..a7c1b0753b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:56:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:42:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.477604e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.411703e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.565078e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.514505 sec -INFO: No Floating Point Exceptions have been reported - 1,395,228,003 cycles:u # 2.612 GHz (74.64%) - 2,518,551 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.17%) - 5,517,893 stalled-cycles-backend:u # 0.40% backend cycles idle (75.33%) - 2,173,715,399 instructions:u # 1.56 insn per cycle - # 0.00 stalled cycles per insn (74.79%) - 0.576603498 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.653794e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.579157e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778336e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.686013 sec +INFO: No Floating Point Exceptions have been reported + 2,680,206,326 cycles # 2.872 GHz + 4,167,068,379 instructions # 1.55 insn per cycle + 0.992521934 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.663928e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.540166e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.540166e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.099990 sec -INFO: No Floating Point Exceptions have been reported - 10,034,826,717 cycles:u # 3.226 GHz (74.92%) - 49,456,007 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.05%) - 47,625,467 stalled-cycles-backend:u # 0.47% backend cycles idle (75.05%) - 28,198,545,003 instructions:u # 2.81 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 3.115118375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 609) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.093810e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.934858e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.934858e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.416949 sec +INFO: No Floating Point Exceptions have been reported + 10,012,195,167 cycles # 2.926 GHz + 25,507,793,848 instructions # 2.55 insn per cycle + 3.422575217 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.919225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.110858e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.110858e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.892504 sec -INFO: No Floating Point Exceptions have been reported - 9,258,923,431 cycles:u # 3.190 GHz (74.95%) - 49,114,037 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.92%) - 57,459,537 stalled-cycles-backend:u # 0.62% backend cycles idle (74.94%) - 21,316,906,499 instructions:u # 2.30 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 2.907430612 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2070) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.342172e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.581913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581913e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.102371 sec +INFO: No Floating Point Exceptions have been reported + 9,123,975,305 cycles # 2.936 GHz + 21,542,843,128 instructions # 2.36 insn per cycle + 3.108003766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.415515e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.012681e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.012681e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.582235 sec -INFO: No Floating Point Exceptions have been reported - 8,197,347,538 cycles:u # 3.162 GHz (75.01%) - 49,538,709 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.01%) - 67,424,791 stalled-cycles-backend:u # 0.82% backend cycles idle (75.01%) - 15,821,676,578 instructions:u # 1.93 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 2.597459574 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2739) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.389028e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.617798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.617798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.042023 sec +INFO: No Floating Point Exceptions have been reported + 8,587,076,543 cycles # 2.818 GHz + 15,956,957,926 instructions # 1.86 insn per cycle + 3.047668407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165086E-002 -Relative difference = 1.0277089447254817e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.421436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.692453e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.692453e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.006782 sec +INFO: No Floating Point Exceptions have been reported + 8,445,737,284 cycles # 2.805 GHz + 15,563,019,384 instructions # 1.84 insn per cycle + 3.012659502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.061400e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.904070e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.904070e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.459956 sec +INFO: No Floating Point Exceptions have been reported + 7,611,248,188 cycles # 2.197 GHz + 14,286,576,836 instructions # 1.88 insn per cycle + 3.465475679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1031) (512y: 164) (512z: 876) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 3821cdc626..db1ecc021d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:12:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:09:40 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.403119e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.105240e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.280053e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.424406 sec -INFO: No Floating Point Exceptions have been reported - 999,360,396 cycles:u # 2.358 GHz (74.54%) - 2,562,121 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.49%) - 10,519,637 stalled-cycles-backend:u # 1.05% backend cycles idle (75.27%) - 1,936,418,482 instructions:u # 1.94 insn per cycle - # 0.01 stalled cycles per insn (73.93%) - 0.550736823 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.236538e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.678017e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.558515e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.582271 sec +INFO: No Floating Point Exceptions have been reported + 2,326,541,752 cycles # 2.874 GHz + 3,619,452,327 instructions # 1.56 insn per cycle + 0.866579999 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.590934e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.865276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.865276e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.635572 sec -INFO: No Floating Point Exceptions have been reported - 15,421,260,631 cycles:u # 3.321 GHz (75.02%) - 40,826,297 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.02%) - 496,980,116 stalled-cycles-backend:u # 3.22% backend cycles idle (75.02%) - 46,999,382,944 instructions:u # 3.05 insn per cycle - # 0.01 stalled cycles per insn (75.02%) - 4.715017705 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.078772e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.274321e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274321e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.204384 sec +INFO: No Floating Point Exceptions have been reported + 18,271,266,571 cycles # 2.943 GHz + 45,007,026,058 instructions # 2.46 insn per cycle + 6.209806202 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.115905e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.429338e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.429338e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.710652 sec -INFO: No Floating Point Exceptions have been reported - 8,665,630,615 cycles:u # 3.188 GHz (74.95%) - 39,210,982 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.99%) - 1,214,273,177 stalled-cycles-backend:u # 14.01% backend cycles idle (74.99%) - 22,397,892,524 instructions:u # 2.58 insn per cycle - # 0.05 stalled cycles per insn (74.99%) - 2.781731901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.258213e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.443370e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443370e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.161459 sec +INFO: No Floating Point Exceptions have been reported + 9,301,142,039 cycles # 2.938 GHz + 22,273,650,036 instructions # 2.39 insn per cycle + 3.166937253 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.329043e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.854609e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.854609e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.609169 sec -INFO: No Floating Point Exceptions have been reported - 7,939,202,306 cycles:u # 3.041 GHz (75.05%) - 41,060,608 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.94%) - 1,746,192,525 stalled-cycles-backend:u # 21.99% backend cycles idle (74.89%) - 15,491,305,602 instructions:u # 1.95 insn per cycle - # 0.11 stalled cycles per insn (74.91%) - 2.721507178 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.422291e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.701347e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.701347e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.969313 sec +INFO: No Floating Point Exceptions have been reported + 8,389,284,998 cycles # 2.822 GHz + 15,752,357,337 instructions # 1.88 insn per cycle + 2.974718872 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.405471e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684326e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684326e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.990876 sec +INFO: No Floating Point Exceptions have been reported + 8,285,038,888 cycles # 2.766 GHz + 15,588,340,357 instructions # 1.88 insn per cycle + 2.996605246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.444926e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.740937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.740937e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.948513 sec +INFO: No Floating Point Exceptions have been reported + 6,657,028,546 cycles # 2.254 GHz + 12,863,339,645 instructions # 1.93 insn per cycle + 2.954217512 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index c6804ffdb2..47dd15a77b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:06:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:52:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.949156e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.073898e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.073898e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.490726 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,549,834,909 cycles:u # 3.183 GHz (74.85%) - 213,060,941 stalled-cycles-frontend:u # 1.21% frontend cycles idle (74.99%) - 6,636,265,921 stalled-cycles-backend:u # 37.81% backend cycles idle (75.12%) - 16,546,926,833 instructions:u # 0.94 insn per cycle - # 0.40 stalled cycles per insn (75.11%) - 5.556605697 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.148525e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888705e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888705e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.710491 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,650,857,001 cycles # 2.904 GHz + 10,226,411,017 instructions # 1.81 insn per cycle + 2.002623091 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.598329e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877545e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877545e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.677709 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,505,411,543 cycles:u # 3.307 GHz (74.87%) - 36,424,638 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.85%) - 461,953,180 stalled-cycles-backend:u # 2.98% backend cycles idle (74.95%) - 47,366,132,559 instructions:u # 3.05 insn per cycle - # 0.01 stalled cycles per insn (75.04%) - 4.694974250 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.051342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236619e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236619e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.467611 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,975,945,343 cycles # 2.932 GHz + 45,166,614,913 instructions # 2.38 insn per cycle + 6.474019296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.042259e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.284113e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.284113e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.829756 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,055,489,718 cycles:u # 3.187 GHz (74.95%) - 38,127,414 stalled-cycles-frontend:u # 0.42% frontend cycles idle (74.95%) - 1,273,596,323 stalled-cycles-backend:u # 14.06% backend cycles idle (74.97%) - 23,471,932,770 instructions:u # 2.59 insn per cycle - # 0.05 stalled cycles per insn (74.97%) - 2.846916011 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.143329e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.199468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199468e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.443303 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,057,348,114 cycles # 2.916 GHz + 23,610,490,289 instructions # 2.35 insn per cycle + 3.450411330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.400094e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.900382e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.900382e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.615660 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,346,715,316 cycles:u # 3.177 GHz (74.93%) - 41,455,442 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.03%) - 1,754,761,500 stalled-cycles-backend:u # 21.02% backend cycles idle (75.03%) - 16,486,470,163 instructions:u # 1.98 insn per cycle - # 0.11 stalled cycles per insn (75.03%) - 2.632632852 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.288972e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.428534e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428534e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.255640 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,181,255,557 cycles # 2.815 GHz + 16,874,424,213 instructions # 1.84 insn per cycle + 3.262739708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.308266e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.504995e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.504995e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.233704 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,120,657,024 cycles # 2.815 GHz + 16,716,849,319 instructions # 1.83 insn per cycle + 3.240866405 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.329690e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.465437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465437e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.207876 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,429,892,192 cycles # 2.312 GHz + 14,072,572,968 instructions # 1.89 insn per cycle + 3.215041865 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index b37392835e..aa8d2ebaf9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:11:26 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:04:44 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.405910e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.163696e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346223e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371906e-02 +- 3.274477e-06 ) GeV^0 -TOTAL : 4.597944 sec -INFO: No Floating Point Exceptions have been reported - 15,023,376,445 cycles:u # 3.253 GHz (74.95%) - 154,433,581 stalled-cycles-frontend:u # 1.03% frontend cycles idle (74.89%) - 6,721,315,352 stalled-cycles-backend:u # 44.74% backend cycles idle (74.92%) - 11,325,642,994 instructions:u # 0.75 insn per cycle - # 0.59 stalled cycles per insn (75.05%) - 4.651851629 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.285600e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.265115e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156209e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.210437 sec +INFO: No Floating Point Exceptions have been reported + 4,156,256,455 cycles # 2.890 GHz + 6,567,216,103 instructions # 1.58 insn per cycle + 1.494886653 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.606207e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.886415e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.886415e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.068119e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.263869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.263869e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.598258 sec -INFO: No Floating Point Exceptions have been reported - 15,285,050,760 cycles:u # 3.319 GHz (74.99%) - 40,424,777 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) - 450,797,994 stalled-cycles-backend:u # 2.95% backend cycles idle (74.98%) - 47,165,248,582 instructions:u # 3.09 insn per cycle - # 0.01 stalled cycles per insn (74.98%) - 4.606838696 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.607111 sec +INFO: No Floating Point Exceptions have been reported + 19,321,982,901 cycles # 2.923 GHz + 45,195,162,918 instructions # 2.34 insn per cycle + 6.612467743 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.159954e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.504617e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.504617e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.243612e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.428241e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.428241e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.677324 sec -INFO: No Floating Point Exceptions have been reported - 8,616,252,132 cycles:u # 3.211 GHz (74.97%) - 39,194,881 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.96%) - 1,204,691,262 stalled-cycles-backend:u # 13.98% backend cycles idle (74.96%) - 22,427,431,536 instructions:u # 2.60 insn per cycle - # 0.05 stalled cycles per insn (74.96%) - 2.685817270 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.520615 sec +INFO: No Floating Point Exceptions have been reported + 10,297,430,100 cycles # 2.921 GHz + 22,355,563,747 instructions # 2.17 insn per cycle + 3.526233568 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.488178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.101916e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.101916e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.394598e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.672540e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.672540e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.487888 sec -INFO: No Floating Point Exceptions have been reported - 7,940,271,110 cycles:u # 3.184 GHz (74.99%) - 40,701,330 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.98%) - 1,735,184,462 stalled-cycles-backend:u # 21.85% backend cycles idle (74.98%) - 15,494,245,719 instructions:u # 1.95 insn per cycle - # 0.11 stalled cycles per insn (74.98%) - 2.496752359 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +TOTAL : 3.341420 sec +INFO: No Floating Point Exceptions have been reported + 9,418,657,206 cycles # 2.815 GHz + 15,664,231,235 instructions # 1.66 insn per cycle + 3.347085737 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.438466e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.794511e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.794511e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.303984 sec +INFO: No Floating Point Exceptions have been reported + 9,386,171,386 cycles # 2.837 GHz + 15,303,933,132 instructions # 1.63 insn per cycle + 3.309654062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.452484e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.752423e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.752423e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.289646 sec +INFO: No Floating Point Exceptions have been reported + 7,666,750,686 cycles # 2.328 GHz + 12,574,987,911 instructions # 1.64 insn per cycle + 3.295237837 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index 7010435267..981ff690e7 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_18:09:36 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:59:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.040495e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.142807e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.322695e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.227617 sec -INFO: No Floating Point Exceptions have been reported - 17,194,136,336 cycles:u # 3.284 GHz (74.97%) - 204,361,213 stalled-cycles-frontend:u # 1.19% frontend cycles idle (75.08%) - 5,523,861,891 stalled-cycles-backend:u # 32.13% backend cycles idle (75.01%) - 16,198,271,603 instructions:u # 0.94 insn per cycle - # 0.34 stalled cycles per insn (74.96%) - 5.279378863 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.867533e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.208256e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.038855e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.512988 sec +INFO: No Floating Point Exceptions have been reported + 5,035,172,508 cycles # 2.896 GHz + 9,178,648,119 instructions # 1.82 insn per cycle + 1.796445964 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.616489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.898615e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.898615e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.569196 sec -INFO: No Floating Point Exceptions have been reported - 15,252,237,370 cycles:u # 3.333 GHz (75.00%) - 39,887,321 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.00%) - 465,360,550 stalled-cycles-backend:u # 3.05% backend cycles idle (75.00%) - 47,141,106,292 instructions:u # 3.09 insn per cycle - # 0.01 stalled cycles per insn (75.00%) - 4.578028940 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.073817e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.267606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.267606e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.232302 sec +INFO: No Floating Point Exceptions have been reported + 18,275,461,834 cycles # 2.931 GHz + 45,008,664,367 instructions # 2.46 insn per cycle + 6.237799317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.169163e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.514055e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.514055e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.673673 sec -INFO: No Floating Point Exceptions have been reported - 8,598,441,727 cycles:u # 3.208 GHz (74.93%) - 38,900,163 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.93%) - 1,168,143,641 stalled-cycles-backend:u # 13.59% backend cycles idle (74.93%) - 22,476,523,422 instructions:u # 2.61 insn per cycle - # 0.05 stalled cycles per insn (74.95%) - 2.682115448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.243088e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.444450e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.444450e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.181850 sec +INFO: No Floating Point Exceptions have been reported + 9,350,023,781 cycles # 2.934 GHz + 22,274,333,552 instructions # 2.38 insn per cycle + 3.187507510 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.442974e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.071928e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.071928e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.522489 sec -INFO: No Floating Point Exceptions have been reported - 8,055,832,723 cycles:u # 3.186 GHz (75.01%) - 40,025,265 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.01%) - 1,745,703,214 stalled-cycles-backend:u # 21.67% backend cycles idle (75.01%) - 15,523,601,959 instructions:u # 1.93 insn per cycle - # 0.11 stalled cycles per insn (75.01%) - 2.531311690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.392219e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.668104e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.668104e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.999184 sec +INFO: No Floating Point Exceptions have been reported + 8,440,748,249 cycles # 2.810 GHz + 15,754,020,269 instructions # 1.87 insn per cycle + 3.004841956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.422328e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.772097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.772097e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.968153 sec +INFO: No Floating Point Exceptions have been reported + 8,367,700,869 cycles # 2.815 GHz + 15,588,459,242 instructions # 1.86 insn per cycle + 2.973858535 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.440851e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.737976e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.737976e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.952558 sec +INFO: No Floating Point Exceptions have been reported + 6,664,861,082 cycles # 2.254 GHz + 12,863,872,119 instructions # 1.93 insn per cycle + 2.958126027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index f2f03e5b07..5f8c460514 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:12:33 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:10:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.504883e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.545624e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.777206e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.411096 sec -INFO: No Floating Point Exceptions have been reported - 1,041,962,156 cycles:u # 2.427 GHz (75.90%) - 2,518,113 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.47%) - 5,083,447 stalled-cycles-backend:u # 0.49% backend cycles idle (74.88%) - 1,816,724,710 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (74.60%) - 0.532974387 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.297995e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.821835e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.125593e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.584822 sec +INFO: No Floating Point Exceptions have been reported + 2,340,511,556 cycles # 2.876 GHz + 3,573,310,904 instructions # 1.53 insn per cycle + 0.872056454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.529116e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.798032e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.798032e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.841556 sec -INFO: No Floating Point Exceptions have been reported - 15,106,076,884 cycles:u # 3.115 GHz (74.97%) - 38,349,519 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.86%) - 708,992,799 stalled-cycles-backend:u # 4.69% backend cycles idle (74.90%) - 46,302,874,988 instructions:u # 3.07 insn per cycle - # 0.02 stalled cycles per insn (75.05%) - 4.891200103 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.074691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.268219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268219e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.225057 sec +INFO: No Floating Point Exceptions have been reported + 18,266,994,357 cycles # 2.932 GHz + 44,980,008,303 instructions # 2.46 insn per cycle + 6.230608513 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.030131e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.362311e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.362311e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.795560 sec -INFO: No Floating Point Exceptions have been reported - 8,571,133,086 cycles:u # 3.057 GHz (74.89%) - 37,424,004 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.92%) - 1,114,870,979 stalled-cycles-backend:u # 13.01% backend cycles idle (74.93%) - 22,409,821,442 instructions:u # 2.61 insn per cycle - # 0.05 stalled cycles per insn (75.05%) - 2.870522320 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1874) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.255829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.437463e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.437463e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.162961 sec +INFO: No Floating Point Exceptions have been reported + 9,315,618,309 cycles # 2.941 GHz + 22,235,168,853 instructions # 2.39 insn per cycle + 3.168519289 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.334828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.876181e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.876181e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.600692 sec -INFO: No Floating Point Exceptions have been reported - 7,927,308,224 cycles:u # 3.039 GHz (74.88%) - 41,121,417 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.88%) - 1,892,251,873 stalled-cycles-backend:u # 23.87% backend cycles idle (74.96%) - 15,423,363,489 instructions:u # 1.95 insn per cycle - # 0.12 stalled cycles per insn (75.11%) - 2.681605545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2501) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.414375e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.703911e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.703911e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.977811 sec +INFO: No Floating Point Exceptions have been reported + 8,430,687,956 cycles # 2.827 GHz + 15,749,443,583 instructions # 1.87 insn per cycle + 2.983247205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2540) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.463260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.781163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781163e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.924999 sec +INFO: No Floating Point Exceptions have been reported + 8,268,651,321 cycles # 2.823 GHz + 15,583,986,651 instructions # 1.88 insn per cycle + 2.930392056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 10) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.442819e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.745195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745195e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.947951 sec +INFO: No Floating Point Exceptions have been reported + 6,669,419,569 cycles # 2.259 GHz + 12,841,335,089 instructions # 1.93 insn per cycle + 2.953404356 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 16) (512z: 1427) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 7d72e2393f..438f6c4f2f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:56:26 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:43:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.417071e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.141548e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.358712e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.395915 sec -INFO: No Floating Point Exceptions have been reported - 1,005,260,750 cycles:u # 2.443 GHz (74.54%) - 2,432,216 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.04%) - 5,446,718 stalled-cycles-backend:u # 0.54% backend cycles idle (74.90%) - 2,051,415,012 instructions:u # 2.04 insn per cycle - # 0.00 stalled cycles per insn (74.29%) - 0.454850741 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.248809e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.661013e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.608831e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.584963 sec +INFO: No Floating Point Exceptions have been reported + 2,341,003,807 cycles # 2.871 GHz + 3,637,581,249 instructions # 1.55 insn per cycle + 0.872273356 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.168008e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.710339e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.710339e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.581583 sec -INFO: No Floating Point Exceptions have been reported - 11,796,627,952 cycles:u # 3.287 GHz (74.92%) - 38,901,660 stalled-cycles-frontend:u # 0.33% frontend cycles idle (75.01%) - 1,791,140,796 stalled-cycles-backend:u # 15.18% backend cycles idle (75.03%) - 37,547,770,471 instructions:u # 3.18 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 3.593660266 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.610499e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.089750e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089750e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.278573 sec +INFO: No Floating Point Exceptions have been reported + 12,205,449,516 cycles # 2.850 GHz + 32,295,858,353 instructions # 2.65 insn per cycle + 4.284066796 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039543819614E-002 -Relative difference = 3.5561191488957804e-08 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.868157e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.114515e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.114515e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.310192 sec -INFO: No Floating Point Exceptions have been reported - 7,377,944,439 cycles:u # 3.184 GHz (74.69%) - 40,246,631 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.77%) - 212,420,701 stalled-cycles-backend:u # 2.88% backend cycles idle (75.08%) - 18,444,683,713 instructions:u # 2.50 insn per cycle - # 0.01 stalled cycles per insn (75.14%) - 2.321918385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2784) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.650495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.446725e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.446725e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.750577 sec +INFO: No Floating Point Exceptions have been reported + 8,071,356,692 cycles # 2.929 GHz + 18,687,842,971 instructions # 2.32 insn per cycle + 2.756173554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.852910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.891146e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.891146e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.311762 sec -INFO: No Floating Point Exceptions have been reported - 7,383,263,785 cycles:u # 3.184 GHz (74.86%) - 43,163,961 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.84%) - 844,232,815 stalled-cycles-backend:u # 11.43% backend cycles idle (74.86%) - 14,155,277,183 instructions:u # 1.92 insn per cycle - # 0.06 stalled cycles per insn (75.00%) - 2.323712564 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4304) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.785833e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615036e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615036e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.627067 sec +INFO: No Floating Point Exceptions have been reported + 7,450,918,918 cycles # 2.831 GHz + 14,249,285,643 instructions # 1.91 insn per cycle + 2.632635594 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2234) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.828862e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.718189e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.718189e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.597300 sec +INFO: No Floating Point Exceptions have been reported + 7,335,966,912 cycles # 2.820 GHz + 13,949,163,288 instructions # 1.90 insn per cycle + 2.602858413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.491639e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.833175e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.833175e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.900511 sec +INFO: No Floating Point Exceptions have been reported + 6,563,891,996 cycles # 2.259 GHz + 13,436,075,613 instructions # 2.05 insn per cycle + 2.906157600 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1201) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 62e2a08489..2bd01da79a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:56:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:43:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.521380e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.682628e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.929767e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.395671 sec -INFO: No Floating Point Exceptions have been reported - 984,202,112 cycles:u # 2.388 GHz (75.12%) - 2,551,855 stalled-cycles-frontend:u # 0.26% frontend cycles idle (73.43%) - 8,367,573 stalled-cycles-backend:u # 0.85% backend cycles idle (74.31%) - 1,973,107,317 instructions:u # 2.00 insn per cycle - # 0.00 stalled cycles per insn (75.73%) - 0.452530501 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.260194e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.691839e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.932675e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.585616 sec +INFO: No Floating Point Exceptions have been reported + 2,337,485,665 cycles # 2.875 GHz + 3,652,863,320 instructions # 1.56 insn per cycle + 0.871732359 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.049505e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.245124e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.245124e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 2.745841 sec -INFO: No Floating Point Exceptions have been reported - 8,844,760,560 cycles:u # 3.213 GHz (75.01%) - 36,071,629 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.01%) - 30,726,186 stalled-cycles-backend:u # 0.35% backend cycles idle (75.01%) - 28,561,504,774 instructions:u # 3.23 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 2.757605366 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.208067e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.235106e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235106e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.219491 sec +INFO: No Floating Point Exceptions have been reported + 9,405,085,609 cycles # 2.917 GHz + 25,703,807,777 instructions # 2.73 insn per cycle + 3.224847546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.237815e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.103867e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.103867e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.165380 sec -INFO: No Floating Point Exceptions have been reported - 6,859,752,406 cycles:u # 3.158 GHz (74.99%) - 37,632,318 stalled-cycles-frontend:u # 0.55% frontend cycles idle (74.96%) - 31,490,359 stalled-cycles-backend:u # 0.46% backend cycles idle (74.96%) - 16,583,709,220 instructions:u # 2.42 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 2.177299928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2423) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.972603e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.428852e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.428852e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.491785 sec +INFO: No Floating Point Exceptions have been reported + 7,313,494,275 cycles # 2.930 GHz + 16,767,205,281 instructions # 2.29 insn per cycle + 2.497135576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.055916e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.396389e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.396389e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.226900 sec -INFO: No Floating Point Exceptions have been reported - 7,078,990,378 cycles:u # 3.169 GHz (74.96%) - 42,150,010 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.93%) - 690,079,947 stalled-cycles-backend:u # 9.75% backend cycles idle (74.93%) - 13,524,313,161 instructions:u # 1.91 insn per cycle - # 0.05 stalled cycles per insn (74.93%) - 2.238496994 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3983) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.941057e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.047750e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047750e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.511970 sec +INFO: No Floating Point Exceptions have been reported + 7,127,612,921 cycles # 2.833 GHz + 13,657,719,583 instructions # 1.92 insn per cycle + 2.517264213 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2067) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053349949187E-002 -Relative difference = 2.611425108340261e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.994854e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.186874e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.186874e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.472132 sec +INFO: No Floating Point Exceptions have been reported + 7,033,406,697 cycles # 2.840 GHz + 13,451,133,295 instructions # 1.91 insn per cycle + 2.477643200 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1935) (512y: 7) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.610829e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.126124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126124e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.783843 sec +INFO: No Floating Point Exceptions have been reported + 6,358,284,694 cycles # 2.280 GHz + 13,173,247,957 instructions # 2.07 insn per cycle + 2.789438831 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 2) (512z: 1081) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 3a3e5d3344..041f4e9efd 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:12:47 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:10:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.207835e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.857313e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.975718e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.562002 sec -INFO: No Floating Point Exceptions have been reported - 1,414,994,194 cycles:u # 2.507 GHz (75.09%) - 2,475,501 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.82%) - 6,282,643 stalled-cycles-backend:u # 0.44% backend cycles idle (75.76%) - 2,186,124,848 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (75.94%) - 0.726771283 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.877042e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.647728e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.852998e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.677390 sec +INFO: No Floating Point Exceptions have been reported + 2,627,954,813 cycles # 2.876 GHz + 4,055,520,615 instructions # 1.54 insn per cycle + 0.972709824 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590281E-002 -Relative difference = 7.67145406542181e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.326045e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.520190e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.520190e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.516829 sec -INFO: No Floating Point Exceptions have been reported - 17,688,420,445 cycles:u # 3.199 GHz (75.04%) - 49,553,059 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.99%) - 144,397,969 stalled-cycles-backend:u # 0.82% backend cycles idle (74.97%) - 47,436,113,566 instructions:u # 2.68 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 5.615037353 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 454) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.020205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.187124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.187124e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.577075 sec +INFO: No Floating Point Exceptions have been reported + 19,371,933,844 cycles # 2.944 GHz + 46,278,733,907 instructions # 2.39 insn per cycle + 6.582537613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.960845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.461824e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.461824e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.988117 sec -INFO: No Floating Point Exceptions have been reported - 12,462,936,271 cycles:u # 3.115 GHz (74.95%) - 49,873,898 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.01%) - 1,140,737,344 stalled-cycles-backend:u # 9.15% backend cycles idle (75.05%) - 31,401,629,715 instructions:u # 2.52 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 4.108231429 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1704) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.635520e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155996e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.262104 sec +INFO: No Floating Point Exceptions have been reported + 12,531,950,606 cycles # 2.937 GHz + 31,465,132,198 instructions # 2.51 insn per cycle + 4.267832274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.603945e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.497171e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497171e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.218211 sec -INFO: No Floating Point Exceptions have been reported - 9,777,394,640 cycles:u # 3.043 GHz (75.02%) - 53,114,088 stalled-cycles-frontend:u # 0.54% frontend cycles idle (75.10%) - 278,323,224 stalled-cycles-backend:u # 2.85% backend cycles idle (75.10%) - 19,375,783,973 instructions:u # 1.98 insn per cycle - # 0.01 stalled cycles per insn (75.10%) - 3.314291243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2054) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.976062e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.756066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.756066e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.592683 sec +INFO: No Floating Point Exceptions have been reported + 10,114,837,946 cycles # 2.812 GHz + 19,479,113,850 instructions # 1.93 insn per cycle + 3.598394582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.011048e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.815376e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.815376e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.535103 sec +INFO: No Floating Point Exceptions have been reported + 9,996,837,570 cycles # 2.824 GHz + 19,291,566,393 instructions # 1.93 insn per cycle + 3.540686440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1799) (512y: 188) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.782393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.383775e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383775e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.939537 sec +INFO: No Floating Point Exceptions have been reported + 8,379,017,732 cycles # 2.125 GHz + 15,108,594,606 instructions # 1.80 insn per cycle + 3.945372714 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 966) (512y: 154) (512z: 1330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 60b29b29bf..63e5511d98 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-09-18_17:13:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:11:04 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.524426e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.496465e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.642073e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.533662 sec -INFO: No Floating Point Exceptions have been reported - 1,327,312,195 cycles:u # 2.382 GHz (75.23%) - 2,544,054 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.56%) - 6,691,894 stalled-cycles-backend:u # 0.50% backend cycles idle (76.41%) - 2,218,110,852 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (74.79%) - 0.711069615 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.941580e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.659467e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829628e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.674748 sec +INFO: No Floating Point Exceptions have been reported + 2,621,919,128 cycles # 2.880 GHz + 4,081,332,751 instructions # 1.56 insn per cycle + 0.969735396 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590284E-002 -Relative difference = 7.67145379496374e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.320736e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.517152e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.517152e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.542087 sec -INFO: No Floating Point Exceptions have been reported - 17,673,617,486 cycles:u # 3.182 GHz (74.96%) - 49,446,270 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.95%) - 788,224,990 stalled-cycles-backend:u # 4.46% backend cycles idle (74.96%) - 46,948,710,948 instructions:u # 2.66 insn per cycle - # 0.02 stalled cycles per insn (75.04%) - 5.631949639 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.022324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.188868e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.188868e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.564992 sec +INFO: No Floating Point Exceptions have been reported + 19,266,332,416 cycles # 2.933 GHz + 46,212,690,278 instructions # 2.40 insn per cycle + 6.570664425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.087278e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.649501e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.649501e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.767539 sec -INFO: No Floating Point Exceptions have been reported - 12,175,180,167 cycles:u # 3.221 GHz (75.03%) - 49,757,175 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.03%) - 322,071,355 stalled-cycles-backend:u # 2.65% backend cycles idle (75.03%) - 31,096,570,929 instructions:u # 2.55 insn per cycle - # 0.01 stalled cycles per insn (75.03%) - 3.866707373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1654) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.631635e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.147723e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.147723e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.273260 sec +INFO: No Floating Point Exceptions have been reported + 12,565,193,084 cycles # 2.937 GHz + 31,464,303,429 instructions # 2.50 insn per cycle + 4.278983280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.775239e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.738743e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.738743e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.006678 sec -INFO: No Floating Point Exceptions have been reported - 9,682,545,011 cycles:u # 3.207 GHz (75.02%) - 51,386,803 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.90%) - 648,242,102 stalled-cycles-backend:u # 6.69% backend cycles idle (74.86%) - 19,228,837,022 instructions:u # 1.99 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 3.117508436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2008) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.965569e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.737055e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737055e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.608853 sec +INFO: No Floating Point Exceptions have been reported + 10,149,451,908 cycles # 2.809 GHz + 19,494,245,478 instructions # 1.92 insn per cycle + 3.614638314 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.020584e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.826510e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.826510e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.523442 sec +INFO: No Floating Point Exceptions have been reported + 9,922,226,767 cycles # 2.813 GHz + 19,194,396,105 instructions # 1.93 insn per cycle + 3.529032291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1766) (512y: 191) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.850816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505094e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505094e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.808968 sec +INFO: No Floating Point Exceptions have been reported + 8,221,926,837 cycles # 2.156 GHz + 14,966,457,412 instructions # 1.82 insn per cycle + 3.814643788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 959) (512y: 155) (512z: 1296) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index ab6dc5f81d..d77862b8c7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:13:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:11:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.934931e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.471034e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.491919e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.419804 sec -INFO: No Floating Point Exceptions have been reported - 996,714,034 cycles:u # 2.341 GHz (76.67%) - 2,409,644 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.26%) - 6,076,059 stalled-cycles-backend:u # 0.61% backend cycles idle (74.79%) - 1,539,514,328 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (74.46%) - 0.546995956 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.432691e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.350673e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001727e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.536474 sec +INFO: No Floating Point Exceptions have been reported + 2,210,506,804 cycles # 2.873 GHz + 3,172,337,100 instructions # 1.44 insn per cycle + 0.829286366 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.565193e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.630775e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.630775e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.274033 sec -INFO: No Floating Point Exceptions have been reported - 14,472,479,383 cycles:u # 3.376 GHz (75.09%) - 8,913,402 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) - 3,647,031,319 stalled-cycles-backend:u # 25.20% backend cycles idle (75.00%) - 45,489,617,307 instructions:u # 3.14 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 4.346418121 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.830003e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.876741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876741e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.837147 sec +INFO: No Floating Point Exceptions have been reported + 17,232,906,357 cycles # 2.950 GHz + 45,930,941,627 instructions # 2.67 insn per cycle + 5.842851386 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.282132e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.475021e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.475021e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.634395 sec -INFO: No Floating Point Exceptions have been reported - 8,806,052,324 cycles:u # 3.326 GHz (74.93%) - 7,897,470 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) - 2,700,862,590 stalled-cycles-backend:u # 30.67% backend cycles idle (74.95%) - 27,875,711,410 instructions:u # 3.17 insn per cycle - # 0.10 stalled cycles per insn (74.93%) - 2.706017261 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.213968e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.373677e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.373677e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.369805 sec +INFO: No Floating Point Exceptions have been reported + 9,944,028,092 cycles # 2.947 GHz + 27,848,243,801 instructions # 2.80 insn per cycle + 3.375396234 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.238804e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.769999e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.769999e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.630385 sec -INFO: No Floating Point Exceptions have been reported - 5,391,944,056 cycles:u # 3.281 GHz (74.52%) - 8,358,752 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.82%) - 121,036,170 stalled-cycles-backend:u # 2.24% backend cycles idle (75.06%) - 12,304,338,508 instructions:u # 2.28 insn per cycle - # 0.01 stalled cycles per insn (75.18%) - 1.717953566 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.005348e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.393032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.393032e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.203017 sec +INFO: No Floating Point Exceptions have been reported + 6,092,356,881 cycles # 2.759 GHz + 12,580,147,933 instructions # 2.06 insn per cycle + 2.208781826 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.533405e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.010418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.010418e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.002945 sec +INFO: No Floating Point Exceptions have been reported + 5,570,120,100 cycles # 2.774 GHz + 12,019,792,186 instructions # 2.16 insn per cycle + 2.008867487 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.539179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.725857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725857e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.069457 sec +INFO: No Floating Point Exceptions have been reported + 5,709,813,977 cycles # 1.857 GHz + 8,292,916,903 instructions # 1.45 insn per cycle + 3.075340516 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 0f2ff73fb0..ac7eb7abb8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:06:19 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:52:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.938623e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.960064e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.960064e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.244964 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,763,093,175 cycles:u # 2.951 GHz (74.98%) - 21,833,946 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.92%) - 1,143,029,876 stalled-cycles-backend:u # 30.37% backend cycles idle (74.71%) - 3,908,529,958 instructions:u # 1.04 insn per cycle - # 0.29 stalled cycles per insn (74.72%) - 1.316004936 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.492890e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.985153e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.985153e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.825573 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,089,630,348 cycles # 2.884 GHz + 4,704,003,879 instructions # 1.52 insn per cycle + 1.129956624 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.558947e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.636556e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.636556e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.370599 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 14,698,313,294 cycles:u # 3.347 GHz (74.93%) - 7,113,957 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) - 3,662,135,115 stalled-cycles-backend:u # 24.92% backend cycles idle (75.04%) - 45,571,381,654 instructions:u # 3.10 insn per cycle - # 0.08 stalled cycles per insn (75.06%) - 4.396217494 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.810470e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.856841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856841e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.979891 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,636,224,981 cycles # 2.947 GHz + 46,002,491,255 instructions # 2.61 insn per cycle + 5.986641580 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.249503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.443162e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.443162e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.741321 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,003,374,826 cycles:u # 3.261 GHz (74.86%) - 8,607,866 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.82%) - 2,678,543,116 stalled-cycles-backend:u # 29.75% backend cycles idle (74.97%) - 27,795,856,321 instructions:u # 3.09 insn per cycle - # 0.10 stalled cycles per insn (75.09%) - 2.766131127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.162709e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.318081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.318081e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.508343 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,301,947,786 cycles # 2.931 GHz + 28,031,926,381 instructions # 2.72 insn per cycle + 3.516023780 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.176793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.686997e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.686997e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.730140 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,523,660,055 cycles:u # 3.156 GHz (74.91%) - 7,829,261 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.87%) - 124,702,269 stalled-cycles-backend:u # 2.26% backend cycles idle (74.87%) - 12,548,851,516 instructions:u # 2.27 insn per cycle - # 0.01 stalled cycles per insn (74.87%) - 1.755657916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.911481e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.286736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.286736e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.328474 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,480,879,664 cycles # 2.775 GHz + 12,869,228,758 instructions # 1.99 insn per cycle + 2.336129053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.391900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.844641e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.844641e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.137350 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,935,186,233 cycles # 2.768 GHz + 12,309,185,637 instructions # 2.07 insn per cycle + 2.144981542 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.478793e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.660239e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660239e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.208412 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,086,695,352 cycles # 1.893 GHz + 8,539,357,346 instructions # 1.40 insn per cycle + 3.215882461 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 4213b36877..43a1422029 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:11:43 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:05:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.937991e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.510467e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.532029e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.294862e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.316742e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.978216e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.108240 sec -INFO: No Floating Point Exceptions have been reported - 3,284,871,136 cycles:u # 2.959 GHz (74.89%) - 10,892,051 stalled-cycles-frontend:u # 0.33% frontend cycles idle (74.92%) - 1,128,730,546 stalled-cycles-backend:u # 34.36% backend cycles idle (74.43%) - 3,078,096,208 instructions:u # 0.94 insn per cycle - # 0.37 stalled cycles per insn (74.73%) - 1.167524787 seconds time elapsed +TOTAL : 0.632097 sec +INFO: No Floating Point Exceptions have been reported + 2,509,027,611 cycles # 2.881 GHz + 3,623,648,413 instructions # 1.44 insn per cycle + 0.928416005 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.580637e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.643728e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.643728e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.823170e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.870043e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870043e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.248465 sec -INFO: No Floating Point Exceptions have been reported - 14,464,635,032 cycles:u # 3.396 GHz (75.03%) - 8,722,841 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) - 3,642,256,779 stalled-cycles-backend:u # 25.18% backend cycles idle (75.02%) - 45,553,728,102 instructions:u # 3.15 insn per cycle - # 0.08 stalled cycles per insn (75.02%) - 4.261541371 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.922002 sec +INFO: No Floating Point Exceptions have been reported + 17,445,049,338 cycles # 2.943 GHz + 45,950,504,380 instructions # 2.63 insn per cycle + 5.927754556 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.199915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.383224e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.383224e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.206424e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.368310e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.368310e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.678911 sec -INFO: No Floating Point Exceptions have been reported - 9,019,928,220 cycles:u # 3.353 GHz (75.03%) - 9,261,975 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.03%) - 2,696,360,899 stalled-cycles-backend:u # 29.89% backend cycles idle (75.02%) - 27,740,633,647 instructions:u # 3.08 insn per cycle - # 0.10 stalled cycles per insn (75.02%) - 2.692188557 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.440040 sec +INFO: No Floating Point Exceptions have been reported + 10,134,263,801 cycles # 2.942 GHz + 27,846,437,463 instructions # 2.75 insn per cycle + 3.446069209 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.285364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.823103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.823103e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.972916e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.355947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.355947e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.624112 sec -INFO: No Floating Point Exceptions have been reported - 5,332,952,658 cycles:u # 3.261 GHz (75.00%) - 8,043,121 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.05%) - 105,121,534 stalled-cycles-backend:u # 1.97% backend cycles idle (75.05%) - 12,312,123,293 instructions:u # 2.31 insn per cycle - # 0.01 stalled cycles per insn (75.05%) - 1.637347721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +TOTAL : 2.278924 sec +INFO: No Floating Point Exceptions have been reported + 6,293,574,887 cycles # 2.756 GHz + 12,563,410,868 instructions # 2.00 insn per cycle + 2.284852020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.484228e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.952695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.952695e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.082886 sec +INFO: No Floating Point Exceptions have been reported + 5,796,540,715 cycles # 2.776 GHz + 11,970,685,605 instructions # 2.07 insn per cycle + 2.088838177 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.533537e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.719277e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.719277e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.139756 sec +INFO: No Floating Point Exceptions have been reported + 5,897,468,368 cycles # 1.875 GHz + 8,242,833,828 instructions # 1.40 insn per cycle + 3.145931095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index ac29a8b745..06cd2419c8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:09:54 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:59:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.803304e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.473400e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.494865e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.202087 sec -INFO: No Floating Point Exceptions have been reported - 3,619,095,250 cycles:u # 2.982 GHz (75.32%) - 21,746,240 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.63%) - 1,112,138,546 stalled-cycles-backend:u # 30.73% backend cycles idle (75.31%) - 3,845,495,935 instructions:u # 1.06 insn per cycle - # 0.29 stalled cycles per insn (74.68%) - 1.265347938 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.799214e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.349130e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.974876e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.720471 sec +INFO: No Floating Point Exceptions have been reported + 2,751,096,444 cycles # 2.885 GHz + 4,339,626,159 instructions # 1.58 insn per cycle + 1.011336555 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.555412e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.618727e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.618727e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.291123 sec -INFO: No Floating Point Exceptions have been reported - 14,460,345,395 cycles:u # 3.361 GHz (74.92%) - 9,128,081 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) - 3,671,357,888 stalled-cycles-backend:u # 25.39% backend cycles idle (75.00%) - 45,435,832,585 instructions:u # 3.14 insn per cycle - # 0.08 stalled cycles per insn (75.08%) - 4.304537317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.816637e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.863801e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.863801e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.879265 sec +INFO: No Floating Point Exceptions have been reported + 17,261,479,917 cycles # 2.934 GHz + 45,935,121,768 instructions # 2.66 insn per cycle + 5.884988360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.287633e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.481280e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.481280e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.630521 sec -INFO: No Floating Point Exceptions have been reported - 8,794,046,271 cycles:u # 3.330 GHz (74.89%) - 7,806,877 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.89%) - 2,687,178,672 stalled-cycles-backend:u # 30.56% backend cycles idle (74.86%) - 27,867,079,455 instructions:u # 3.17 insn per cycle - # 0.10 stalled cycles per insn (74.98%) - 2.643204398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.202828e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.362707e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.362707e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.382024 sec +INFO: No Floating Point Exceptions have been reported + 9,945,427,320 cycles # 2.936 GHz + 27,847,352,314 instructions # 2.80 insn per cycle + 3.387994978 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.253032e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.782725e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.782725e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.628819 sec -INFO: No Floating Point Exceptions have been reported - 5,370,533,081 cycles:u # 3.276 GHz (74.73%) - 8,299,845 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.98%) - 95,736,243 stalled-cycles-backend:u # 1.78% backend cycles idle (75.12%) - 12,295,237,724 instructions:u # 2.29 insn per cycle - # 0.01 stalled cycles per insn (75.12%) - 1.641761550 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.949448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.331919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.331919e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.228024 sec +INFO: No Floating Point Exceptions have been reported + 6,117,137,090 cycles # 2.739 GHz + 12,580,569,234 instructions # 2.06 insn per cycle + 2.234097878 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.342003e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.785664e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.785664e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.072519 sec +INFO: No Floating Point Exceptions have been reported + 5,591,470,515 cycles # 2.691 GHz + 12,020,476,993 instructions # 2.15 insn per cycle + 2.078517041 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.530876e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.717281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.717281e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.077997 sec +INFO: No Floating Point Exceptions have been reported + 5,702,073,376 cycles # 1.850 GHz + 8,294,780,221 instructions # 1.45 insn per cycle + 3.083993360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 7b8f0592a7..a4f203143e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:13:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:11:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.966399e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.504545e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.526121e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.411007 sec -INFO: No Floating Point Exceptions have been reported - 955,029,396 cycles:u # 2.188 GHz (74.48%) - 2,514,775 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.03%) - 5,592,491 stalled-cycles-backend:u # 0.59% backend cycles idle (74.83%) - 1,547,862,874 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (75.44%) - 0.575051879 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.820817e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.978279e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.339111e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.700005 sec +INFO: No Floating Point Exceptions have been reported + 2,762,648,255 cycles # 2.857 GHz + 3,086,101,973 instructions # 1.12 insn per cycle + 1.026825767 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.508523e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.572646e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.572646e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.371058 sec -INFO: No Floating Point Exceptions have been reported - 14,128,475,872 cycles:u # 3.222 GHz (75.00%) - 9,065,194 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 289,306,543 stalled-cycles-backend:u # 2.05% backend cycles idle (75.01%) - 44,427,266,143 instructions:u # 3.14 insn per cycle - # 0.01 stalled cycles per insn (75.01%) - 4.459993433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.875216e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.924982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.924982e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.700425 sec +INFO: No Floating Point Exceptions have been reported + 16,757,702,666 cycles # 2.937 GHz + 44,923,641,547 instructions # 2.68 insn per cycle + 5.706326125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.430399e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.647944e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.647944e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.562854 sec -INFO: No Floating Point Exceptions have been reported - 8,369,699,777 cycles:u # 3.249 GHz (74.89%) - 9,515,612 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.87%) - 671,847,691 stalled-cycles-backend:u # 8.03% backend cycles idle (74.85%) - 26,789,411,251 instructions:u # 3.20 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 2.664751352 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2266) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.370762e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.546946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.546946e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.219225 sec +INFO: No Floating Point Exceptions have been reported + 9,494,791,570 cycles # 2.945 GHz + 26,687,379,503 instructions # 2.81 insn per cycle + 3.225069589 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.526556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.948550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.948550e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.788592 sec -INFO: No Floating Point Exceptions have been reported - 5,941,360,097 cycles:u # 3.298 GHz (74.70%) - 9,310,250 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.92%) - 1,408,571,969 stalled-cycles-backend:u # 23.71% backend cycles idle (75.14%) - 14,136,602,484 instructions:u # 2.38 insn per cycle - # 0.10 stalled cycles per insn (75.14%) - 1.845020820 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.607569e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.929909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.929909e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.383333 sec +INFO: No Floating Point Exceptions have been reported + 6,604,949,302 cycles # 2.766 GHz + 14,119,001,234 instructions # 2.14 insn per cycle + 2.388928721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.803756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.157173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.157173e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.289999 sec +INFO: No Floating Point Exceptions have been reported + 6,348,634,731 cycles # 2.767 GHz + 13,715,767,912 instructions # 2.16 insn per cycle + 2.295499005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 298) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.387276e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.557456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.557456e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.203296 sec +INFO: No Floating Point Exceptions have been reported + 5,911,433,799 cycles # 1.843 GHz + 10,058,967,230 instructions # 1.70 insn per cycle + 3.209029605 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index a9445d3e36..797e37fdb1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:56:49 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:43:53 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.959257e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.482289e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.503835e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.401016 sec -INFO: No Floating Point Exceptions have been reported - 982,588,415 cycles:u # 2.335 GHz (74.23%) - 2,633,099 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.51%) - 7,514,039 stalled-cycles-backend:u # 0.76% backend cycles idle (74.87%) - 1,557,120,082 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.65%) - 0.460989527 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.310192e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.359217e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.986325e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.536645 sec +INFO: No Floating Point Exceptions have been reported + 2,216,199,851 cycles # 2.870 GHz + 3,159,776,582 instructions # 1.43 insn per cycle + 0.831121874 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.938860e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.021462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.021462e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.751082 sec -INFO: No Floating Point Exceptions have been reported - 12,709,283,737 cycles:u # 3.378 GHz (74.93%) - 8,556,908 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) - 4,229,942,104 stalled-cycles-backend:u # 33.28% backend cycles idle (74.93%) - 35,277,140,070 instructions:u # 2.78 insn per cycle - # 0.12 stalled cycles per insn (74.95%) - 3.767075266 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 885) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.421869e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.505515e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505515e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.438528 sec +INFO: No Floating Point Exceptions have been reported + 13,015,204,187 cycles # 2.929 GHz + 34,341,759,533 instructions # 2.64 insn per cycle + 4.444441151 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.584654e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.803796e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.803796e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.471773 sec -INFO: No Floating Point Exceptions have been reported - 8,280,641,404 cycles:u # 3.335 GHz (74.90%) - 9,308,773 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.90%) - 1,538,273,950 stalled-cycles-backend:u # 18.58% backend cycles idle (74.91%) - 21,677,236,050 instructions:u # 2.62 insn per cycle - # 0.07 stalled cycles per insn (75.08%) - 2.545937944 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.982901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.119934e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119934e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.624625 sec +INFO: No Floating Point Exceptions have been reported + 10,679,803,279 cycles # 2.942 GHz + 24,245,188,333 instructions # 2.27 insn per cycle + 3.630600501 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.688766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.149893e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.149893e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.751078 sec -INFO: No Floating Point Exceptions have been reported - 5,754,161,838 cycles:u # 3.264 GHz (74.89%) - 8,027,521 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.04%) - 1,733,341,378 stalled-cycles-backend:u # 30.12% backend cycles idle (75.04%) - 12,004,181,265 instructions:u # 2.09 insn per cycle - # 0.14 stalled cycles per insn (75.04%) - 1.767279218 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3012) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.555816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.876140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.876140e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.410266 sec +INFO: No Floating Point Exceptions have been reported + 6,676,895,845 cycles # 2.765 GHz + 12,404,391,789 instructions # 1.86 insn per cycle + 2.415872101 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.932497e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.306284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.306284e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.233356 sec +INFO: No Floating Point Exceptions have been reported + 6,172,218,152 cycles # 2.758 GHz + 11,544,853,425 instructions # 1.87 insn per cycle + 2.239017897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2644) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.760390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.970863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.970863e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.895519 sec +INFO: No Floating Point Exceptions have been reported + 5,386,476,820 cycles # 1.857 GHz + 9,291,001,680 instructions # 1.72 insn per cycle + 2.901312030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2099) (512y: 282) (512z: 1958) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 5f12e2193a..af0c8fa098 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:57:01 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:44:17 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.981203e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.539251e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.562219e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.393786 sec -INFO: No Floating Point Exceptions have been reported - 981,751,203 cycles:u # 2.364 GHz (75.65%) - 2,492,719 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.20%) - 6,301,960 stalled-cycles-backend:u # 0.64% backend cycles idle (76.56%) - 1,593,527,761 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 0.457304440 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.294016e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.195619e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.822974e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.537359 sec +INFO: No Floating Point Exceptions have been reported + 2,212,895,393 cycles # 2.861 GHz + 3,167,520,059 instructions # 1.43 insn per cycle + 0.832101772 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.634816e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.764509e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.764509e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.066427 sec -INFO: No Floating Point Exceptions have been reported - 10,350,634,816 cycles:u # 3.363 GHz (74.92%) - 9,207,673 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.04%) - 15,199,377 stalled-cycles-backend:u # 0.15% backend cycles idle (75.05%) - 34,607,039,252 instructions:u # 3.34 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 3.083652986 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.565164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657330e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.194388 sec +INFO: No Floating Point Exceptions have been reported + 12,320,787,698 cycles # 2.934 GHz + 34,912,998,062 instructions # 2.83 insn per cycle + 4.200192046 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.984118e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.247895e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.247895e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.287179 sec -INFO: No Floating Point Exceptions have been reported - 7,639,182,299 cycles:u # 3.324 GHz (75.02%) - 9,475,513 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.95%) - 1,914,637,161 stalled-cycles-backend:u # 25.06% backend cycles idle (74.94%) - 21,134,965,478 instructions:u # 2.77 insn per cycle - # 0.09 stalled cycles per insn (74.97%) - 2.303285175 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.989812e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.127480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.127480e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.616035 sec +INFO: No Floating Point Exceptions have been reported + 10,626,604,482 cycles # 2.935 GHz + 23,338,496,545 instructions # 2.20 insn per cycle + 3.621790672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.310818e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.845191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.845191e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.615040 sec -INFO: No Floating Point Exceptions have been reported - 5,309,461,499 cycles:u # 3.264 GHz (74.72%) - 8,747,447 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.92%) - 1,032,040,214 stalled-cycles-backend:u # 19.44% backend cycles idle (74.96%) - 11,420,261,490 instructions:u # 2.15 insn per cycle - # 0.09 stalled cycles per insn (74.96%) - 1.631528490 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2332) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.054894e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.447738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.447738e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.181056 sec +INFO: No Floating Point Exceptions have been reported + 6,051,059,717 cycles # 2.768 GHz + 11,860,809,289 instructions # 1.96 insn per cycle + 2.186772408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2468) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.028106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.414371e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.414371e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.192869 sec +INFO: No Floating Point Exceptions have been reported + 6,064,121,206 cycles # 2.759 GHz + 11,098,432,522 instructions # 1.83 insn per cycle + 2.198761953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2098) (512y: 174) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.876416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.107845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107845e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.813537 sec +INFO: No Floating Point Exceptions have been reported + 5,237,838,464 cycles # 1.858 GHz + 9,015,066,552 instructions # 1.72 insn per cycle + 2.819357375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1567) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 843b1434d8..0cce370026 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:13:48 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:12:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.003234e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159945e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.183678e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.343481 sec -INFO: No Floating Point Exceptions have been reported - 800,538,383 cycles:u # 2.218 GHz (73.03%) - 2,346,963 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.25%) - 6,715,861 stalled-cycles-backend:u # 0.84% backend cycles idle (74.73%) - 1,500,212,452 instructions:u # 1.87 insn per cycle - # 0.00 stalled cycles per insn (74.46%) - 0.497087625 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.285654e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.744544e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855248e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.489810 sec +INFO: No Floating Point Exceptions have been reported + 2,058,086,051 cycles # 2.871 GHz + 2,937,778,801 instructions # 1.43 insn per cycle + 0.774500335 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.920924e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.010747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.010747e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.736629 sec -INFO: No Floating Point Exceptions have been reported - 12,772,799,800 cycles:u # 3.410 GHz (74.97%) - 6,438,869 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) - 38,957,089 stalled-cycles-backend:u # 0.31% backend cycles idle (75.02%) - 45,402,799,814 instructions:u # 3.55 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 3.842668704 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.924099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.978298e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.978298e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.535123 sec +INFO: No Floating Point Exceptions have been reported + 16,260,554,497 cycles # 2.935 GHz + 45,332,637,380 instructions # 2.79 insn per cycle + 5.540566072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.004887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.369552e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.369552e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.893575 sec -INFO: No Floating Point Exceptions have been reported - 6,329,966,260 cycles:u # 3.327 GHz (74.89%) - 5,942,225 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.85%) - 2,806,250,644 stalled-cycles-backend:u # 44.33% backend cycles idle (74.81%) - 17,176,219,337 instructions:u # 2.71 insn per cycle - # 0.16 stalled cycles per insn (74.92%) - 2.089375786 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.537932e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.874791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.874791e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.397466 sec +INFO: No Floating Point Exceptions have been reported + 7,088,165,806 cycles # 2.951 GHz + 17,790,594,363 instructions # 2.51 insn per cycle + 2.403188687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.176093e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317775e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.317775e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.037790 sec -INFO: No Floating Point Exceptions have been reported - 3,374,015,291 cycles:u # 3.224 GHz (75.06%) - 6,964,114 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.84%) - 1,080,311,721 stalled-cycles-backend:u # 32.02% backend cycles idle (74.78%) - 8,107,806,529 instructions:u # 2.40 insn per cycle - # 0.13 stalled cycles per insn (74.84%) - 1.139350959 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.392634e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.540507e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.540507e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.334609 sec +INFO: No Floating Point Exceptions have been reported + 3,736,094,091 cycles # 2.789 GHz + 8,261,313,611 instructions # 2.21 insn per cycle + 1.340132908 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.862239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.012505e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.012505e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.268798 sec +INFO: No Floating Point Exceptions have been reported + 3,543,869,427 cycles # 2.783 GHz + 7,911,503,214 instructions # 2.23 insn per cycle + 1.274261347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.491068e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.141806e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.141806e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.701721 sec +INFO: No Floating Point Exceptions have been reported + 3,270,419,298 cycles # 1.917 GHz + 6,095,745,028 instructions # 1.86 insn per cycle + 1.707211646 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index bcb4d19bce..5e7502fc17 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:06:32 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:53:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.807509e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.983639e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.983639e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.162866 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,552,364,031 cycles:u # 2.994 GHz (74.52%) - 37,548,960 stalled-cycles-frontend:u # 1.06% frontend cycles idle (75.08%) - 1,127,077,852 stalled-cycles-backend:u # 31.73% backend cycles idle (74.93%) - 3,817,045,743 instructions:u # 1.07 insn per cycle - # 0.30 stalled cycles per insn (75.31%) - 1.224889095 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.022210e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.414163e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.414163e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.683925 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,644,974,332 cycles # 2.886 GHz + 4,089,078,726 instructions # 1.55 insn per cycle + 0.974029218 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.950552e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.034859e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.034859e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.742410 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 12,696,850,045 cycles:u # 3.381 GHz (74.90%) - 6,935,878 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.98%) - 28,501,944 stalled-cycles-backend:u # 0.22% backend cycles idle (75.08%) - 45,502,459,168 instructions:u # 3.58 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 3.837658112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.927229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.981708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.981708e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.572760 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 16,435,796,229 cycles # 2.946 GHz + 45,376,812,282 instructions # 2.76 insn per cycle + 5.580128034 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.203067e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.592213e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.592213e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.883234 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,265,430,020 cycles:u # 3.305 GHz (74.76%) - 6,791,933 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.83%) - 2,590,936,624 stalled-cycles-backend:u # 41.35% backend cycles idle (75.04%) - 17,234,116,973 instructions:u # 2.75 insn per cycle - # 0.15 stalled cycles per insn (75.10%) - 1.900512135 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.483217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.814609e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.814609e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.475211 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,297,746,086 cycles # 2.941 GHz + 18,073,033,530 instructions # 2.48 insn per cycle + 2.482430942 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.171349e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.310974e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.310974e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.085681 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,491,792,899 cycles:u # 3.179 GHz (74.52%) - 7,299,229 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.85%) - 1,090,281,496 stalled-cycles-backend:u # 31.22% backend cycles idle (75.21%) - 8,272,960,237 instructions:u # 2.37 insn per cycle - # 0.13 stalled cycles per insn (75.24%) - 1.103698634 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.199525e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.300829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.300829e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.415446 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,953,896,804 cycles # 2.781 GHz + 8,500,905,843 instructions # 2.15 insn per cycle + 1.422523843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.608107e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.919736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.919736e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.364816 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,828,677,143 cycles # 2.793 GHz + 8,155,232,689 instructions # 2.13 insn per cycle + 1.371531073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.398900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.033073e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.033073e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.777483 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,485,580,348 cycles # 1.954 GHz + 6,352,386,091 instructions # 1.82 insn per cycle + 1.784705241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index ae32bb5481..7b3bdcf221 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:11:56 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:05:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.809931e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.224979e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251448e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.080340e+00 +- 3.470037e-03 ) GeV^0 -TOTAL : 1.024141 sec -INFO: No Floating Point Exceptions have been reported - 3,125,129,101 cycles:u # 2.998 GHz (74.69%) - 27,700,211 stalled-cycles-frontend:u # 0.89% frontend cycles idle (74.76%) - 1,124,092,602 stalled-cycles-backend:u # 35.97% backend cycles idle (74.74%) - 2,948,330,703 instructions:u # 0.94 insn per cycle - # 0.38 stalled cycles per insn (74.80%) - 1.077623925 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.256953e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.707995e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.827629e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.581626 sec +INFO: No Floating Point Exceptions have been reported + 2,320,591,922 cycles # 2.873 GHz + 3,370,044,879 instructions # 1.45 insn per cycle + 0.865525838 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.937740e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.020684e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.020684e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.923451e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.977569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.977569e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.718315 sec -INFO: No Floating Point Exceptions have been reported - 12,679,943,630 cycles:u # 3.404 GHz (74.81%) - 7,147,017 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 16,502,581 stalled-cycles-backend:u # 0.13% backend cycles idle (75.09%) - 45,503,865,325 instructions:u # 3.59 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 3.727751012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.596105 sec +INFO: No Floating Point Exceptions have been reported + 16,423,082,806 cycles # 2.932 GHz + 45,361,162,230 instructions # 2.76 insn per cycle + 5.601871750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.012329e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.382116e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.382116e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.890925 sec -INFO: No Floating Point Exceptions have been reported - 6,370,824,183 cycles:u # 3.357 GHz (74.61%) - 5,952,086 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.99%) - 2,785,178,986 stalled-cycles-backend:u # 43.72% backend cycles idle (75.13%) - 17,103,920,086 instructions:u # 2.68 insn per cycle - # 0.16 stalled cycles per insn (75.13%) - 1.899977692 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.510624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.845954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.845954e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.467999 sec +INFO: No Floating Point Exceptions have been reported + 7,259,263,758 cycles # 2.936 GHz + 17,804,964,488 instructions # 2.45 insn per cycle + 2.473643333 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.186993e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330461e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330461e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.031162 sec -INFO: No Floating Point Exceptions have been reported - 3,393,358,695 cycles:u # 3.269 GHz (74.26%) - 7,282,823 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.43%) - 1,075,923,160 stalled-cycles-backend:u # 31.71% backend cycles idle (75.14%) - 8,043,926,213 instructions:u # 2.37 insn per cycle - # 0.13 stalled cycles per insn (75.35%) - 1.040189831 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.271097e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.411462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.411462e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.412437 sec +INFO: No Floating Point Exceptions have been reported + 3,908,301,423 cycles # 2.757 GHz + 8,246,550,739 instructions # 2.11 insn per cycle + 1.418307229 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.660773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.926852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.926852e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.356729 sec +INFO: No Floating Point Exceptions have been reported + 3,755,878,291 cycles # 2.759 GHz + 7,864,539,547 instructions # 2.09 insn per cycle + 1.362169016 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.442823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.089629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.089629e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.773182 sec +INFO: No Floating Point Exceptions have been reported + 3,435,797,893 cycles # 1.932 GHz + 6,046,565,657 instructions # 1.76 insn per cycle + 1.778888357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 47959b4036..8649c65a6a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_18:10:07 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:00:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.618825e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.211694e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.237729e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.124995 sec -INFO: No Floating Point Exceptions have been reported - 3,476,336,954 cycles:u # 3.026 GHz (74.85%) - 36,627,328 stalled-cycles-frontend:u # 1.05% frontend cycles idle (74.54%) - 1,114,403,962 stalled-cycles-backend:u # 32.06% backend cycles idle (74.63%) - 3,788,721,035 instructions:u # 1.09 insn per cycle - # 0.29 stalled cycles per insn (74.82%) - 1.181447151 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.491816e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.706264e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.829121e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.638763 sec +INFO: No Floating Point Exceptions have been reported + 2,486,309,752 cycles # 2.846 GHz + 3,832,853,586 instructions # 1.54 insn per cycle + 0.933212094 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.970848e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.055504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.055504e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.676706 sec -INFO: No Floating Point Exceptions have been reported - 12,570,831,222 cycles:u # 3.412 GHz (75.03%) - 6,771,135 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) - 11,872,257 stalled-cycles-backend:u # 0.09% backend cycles idle (75.03%) - 45,451,149,469 instructions:u # 3.62 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 3.686013589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.926255e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.980755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.980755e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.530359 sec +INFO: No Floating Point Exceptions have been reported + 16,260,744,493 cycles # 2.938 GHz + 45,331,881,354 instructions # 2.79 insn per cycle + 5.536242796 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.021461e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.391294e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.391294e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.888421 sec -INFO: No Floating Point Exceptions have been reported - 6,350,127,223 cycles:u # 3.350 GHz (75.00%) - 6,579,219 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.10%) - 2,776,635,796 stalled-cycles-backend:u # 43.73% backend cycles idle (75.10%) - 17,072,514,219 instructions:u # 2.69 insn per cycle - # 0.16 stalled cycles per insn (75.10%) - 1.897472156 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.514119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.847574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.847574e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.409823 sec +INFO: No Floating Point Exceptions have been reported + 7,091,224,967 cycles # 2.937 GHz + 17,790,807,442 instructions # 2.51 insn per cycle + 2.415653910 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.189325e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.334414e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.334414e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.031970 sec -INFO: No Floating Point Exceptions have been reported - 3,372,633,035 cycles:u # 3.246 GHz (74.65%) - 7,160,636 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.62%) - 1,078,144,489 stalled-cycles-backend:u # 31.97% backend cycles idle (74.82%) - 8,046,994,912 instructions:u # 2.39 insn per cycle - # 0.13 stalled cycles per insn (75.24%) - 1.040907882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.315319e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.466327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.466327e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.347295 sec +INFO: No Floating Point Exceptions have been reported + 3,748,135,716 cycles # 2.771 GHz + 8,261,548,625 instructions # 2.20 insn per cycle + 1.353086220 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.772831e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005617e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005617e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.281889 sec +INFO: No Floating Point Exceptions have been reported + 3,559,044,656 cycles # 2.766 GHz + 7,911,466,674 instructions # 2.22 insn per cycle + 1.287610992 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.412498e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103906e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.722151 sec +INFO: No Floating Point Exceptions have been reported + 3,304,024,823 cycles # 1.914 GHz + 6,099,911,719 instructions # 1.85 insn per cycle + 1.727529111 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 2513d26a17..fbbd4d7aad 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:13:59 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:12:45 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.843140e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.140200e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.163746e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.345480 sec -INFO: No Floating Point Exceptions have been reported - 769,099,133 cycles:u # 2.131 GHz (74.97%) - 2,311,642 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.67%) - 12,497,784 stalled-cycles-backend:u # 1.62% backend cycles idle (75.45%) - 1,511,553,566 instructions:u # 1.97 insn per cycle - # 0.01 stalled cycles per insn (74.94%) - 0.492189685 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.269829e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.739721e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.856627e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.490633 sec +INFO: No Floating Point Exceptions have been reported + 2,054,923,204 cycles # 2.868 GHz + 2,821,409,154 instructions # 1.37 insn per cycle + 0.774891828 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.965127e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.051164e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.051164e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.685667 sec -INFO: No Floating Point Exceptions have been reported - 12,497,981,355 cycles:u # 3.383 GHz (74.99%) - 6,231,610 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.93%) - 1,604,793,472 stalled-cycles-backend:u # 12.84% backend cycles idle (74.90%) - 44,383,694,468 instructions:u # 3.55 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 3.738273158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.972061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.029032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.029032e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.402943 sec +INFO: No Floating Point Exceptions have been reported + 15,938,200,378 cycles # 2.947 GHz + 44,441,419,092 instructions # 2.79 insn per cycle + 5.408620560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.443597e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.024448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.024448e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.558930 sec -INFO: No Floating Point Exceptions have been reported - 5,185,372,749 cycles:u # 3.308 GHz (75.01%) - 6,775,953 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.00%) - 1,487,280,863 stalled-cycles-backend:u # 28.68% backend cycles idle (75.00%) - 16,897,152,513 instructions:u # 3.26 insn per cycle - # 0.09 stalled cycles per insn (75.00%) - 1.626354383 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2753) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.316687e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.790003e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.790003e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.058425 sec +INFO: No Floating Point Exceptions have been reported + 6,073,730,384 cycles # 2.944 GHz + 17,080,831,031 instructions # 2.81 insn per cycle + 2.063919735 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.732403e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.488075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.488075e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.348095 sec -INFO: No Floating Point Exceptions have been reported - 4,434,654,710 cycles:u # 3.269 GHz (74.70%) - 6,657,210 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.88%) - 1,733,238,057 stalled-cycles-backend:u # 39.08% backend cycles idle (75.17%) - 10,216,913,126 instructions:u # 2.30 insn per cycle - # 0.17 stalled cycles per insn (75.24%) - 1.426007773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3885) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.040290e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.607212e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.607212e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.821129 sec +INFO: No Floating Point Exceptions have been reported + 5,028,060,974 cycles # 2.754 GHz + 10,226,327,467 instructions # 2.03 insn per cycle + 1.826739648 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.109926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.690770e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.690770e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.802334 sec +INFO: No Floating Point Exceptions have been reported + 4,967,999,007 cycles # 2.749 GHz + 9,996,248,012 instructions # 2.01 insn per cycle + 1.807786513 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.589191e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.908384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.908384e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.373161 sec +INFO: No Floating Point Exceptions have been reported + 4,379,373,712 cycles # 1.842 GHz + 8,445,292,719 instructions # 1.93 insn per cycle + 2.379096717 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 118dd06c8f..b94de9fae6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:57:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:44:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.932103e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175689e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.200034e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.336331 sec -INFO: No Floating Point Exceptions have been reported - 843,912,844 cycles:u # 2.404 GHz (72.87%) - 2,445,193 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.96%) - 7,612,619 stalled-cycles-backend:u # 0.90% backend cycles idle (76.93%) - 1,427,522,026 instructions:u # 1.69 insn per cycle - # 0.01 stalled cycles per insn (75.89%) - 0.392680618 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.109159e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754036e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870271e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.491488 sec +INFO: No Floating Point Exceptions have been reported + 2,077,663,912 cycles # 2.873 GHz + 2,918,599,943 instructions # 1.40 insn per cycle + 0.780254295 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.694570e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.826993e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.826993e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.986758 sec -INFO: No Floating Point Exceptions have been reported - 10,133,396,715 cycles:u # 3.384 GHz (74.90%) - 6,350,756 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.78%) - 1,086,346,025 stalled-cycles-backend:u # 10.72% backend cycles idle (74.84%) - 34,556,648,168 instructions:u # 3.41 insn per cycle - # 0.03 stalled cycles per insn (75.09%) - 2.999304468 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 762) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.511694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.603843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603843e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.261367 sec +INFO: No Floating Point Exceptions have been reported + 12,578,636,437 cycles # 2.949 GHz + 34,608,642,396 instructions # 2.75 insn per cycle + 4.266948834 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.421806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.989977e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.989977e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.558597 sec -INFO: No Floating Point Exceptions have been reported - 5,216,610,830 cycles:u # 3.331 GHz (75.02%) - 6,672,763 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.97%) - 1,881,496,588 stalled-cycles-backend:u # 36.07% backend cycles idle (74.97%) - 14,512,314,160 instructions:u # 2.78 insn per cycle - # 0.13 stalled cycles per insn (74.97%) - 1.570709178 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.245729e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.707902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.707902e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.085904 sec +INFO: No Floating Point Exceptions have been reported + 6,139,006,311 cycles # 2.936 GHz + 14,814,345,795 instructions # 2.41 insn per cycle + 2.091585873 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198769558221 -Relative difference = 6.06481491495597e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193755550310 +Relative difference = 1.8511017053446366e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.666171e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.059188e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059188e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.230905 sec -INFO: No Floating Point Exceptions have been reported - 4,071,672,464 cycles:u # 3.288 GHz (74.81%) - 7,713,984 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.81%) - 1,584,620,055 stalled-cycles-backend:u # 38.92% backend cycles idle (74.87%) - 8,963,815,100 instructions:u # 2.20 insn per cycle - # 0.18 stalled cycles per insn (74.87%) - 1.243036539 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4429) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.217326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.053698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.053698e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.539150 sec +INFO: No Floating Point Exceptions have been reported + 4,266,849,527 cycles # 2.764 GHz + 9,068,527,132 instructions # 2.13 insn per cycle + 1.544604329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186736870557 -Relative difference = 1.6083886449260875e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.341390e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.190395e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.190395e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.513835 sec +INFO: No Floating Point Exceptions have been reported + 4,209,677,652 cycles # 2.772 GHz + 8,658,962,407 instructions # 2.06 insn per cycle + 1.519314933 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4233) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.363197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.802509e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.802509e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.041800 sec +INFO: No Floating Point Exceptions have been reported + 3,848,539,052 cycles # 1.880 GHz + 7,805,686,420 instructions # 2.03 insn per cycle + 2.047559874 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183246739209 +Relative difference = 1.6003107281264138e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index b7d1150cb7..647db6d470 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:57:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:45:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.869011e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.134218e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.157950e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.339311 sec -INFO: No Floating Point Exceptions have been reported - 825,835,247 cycles:u # 2.323 GHz (75.40%) - 2,448,227 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.37%) - 13,017,603 stalled-cycles-backend:u # 1.58% backend cycles idle (75.25%) - 1,457,584,553 instructions:u # 1.76 insn per cycle - # 0.01 stalled cycles per insn (76.31%) - 0.400194249 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.181121e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754734e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.875454e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.493098 sec +INFO: No Floating Point Exceptions have been reported + 2,068,560,161 cycles # 2.866 GHz + 2,913,404,401 instructions # 1.41 insn per cycle + 0.778994585 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.954979e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.108161e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.108161e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.797007 sec -INFO: No Floating Point Exceptions have been reported - 9,501,070,094 cycles:u # 3.388 GHz (74.92%) - 6,219,947 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) - 9,308,030 stalled-cycles-backend:u # 0.10% backend cycles idle (74.92%) - 34,666,442,750 instructions:u # 3.65 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 2.809775897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 434) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.673571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.779018e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779018e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.007457 sec +INFO: No Floating Point Exceptions have been reported + 11,821,622,506 cycles # 2.947 GHz + 35,077,213,703 instructions # 2.97 insn per cycle + 4.012923546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.127608e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.834460e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.834460e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.435085 sec -INFO: No Floating Point Exceptions have been reported - 4,759,046,807 cycles:u # 3.300 GHz (75.05%) - 7,247,357 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.04%) - 1,159,148,260 stalled-cycles-backend:u # 24.36% backend cycles idle (75.04%) - 13,935,870,455 instructions:u # 2.93 insn per cycle - # 0.08 stalled cycles per insn (75.05%) - 1.446829797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2467) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.446071e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.947640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.947640e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.011399 sec +INFO: No Floating Point Exceptions have been reported + 5,918,531,500 cycles # 2.935 GHz + 14,532,054,201 instructions # 2.46 insn per cycle + 2.017166521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198892958462 -Relative difference = 5.4565783974899003e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193583255634 +Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.020322e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.125599e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.125599e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.174617 sec -INFO: No Floating Point Exceptions have been reported - 3,873,061,459 cycles:u # 3.276 GHz (74.86%) - 7,352,600 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.65%) - 1,416,471,421 stalled-cycles-backend:u # 36.57% backend cycles idle (74.96%) - 8,561,369,225 instructions:u # 2.21 insn per cycle - # 0.17 stalled cycles per insn (74.96%) - 1.186707415 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3397) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.388337e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.293979e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.293979e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.508049 sec +INFO: No Floating Point Exceptions have been reported + 4,192,067,529 cycles # 2.771 GHz + 8,850,538,175 instructions # 2.11 insn per cycle + 1.513555792 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186836987734 -Relative difference = 1.559041129563128e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.539896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.448863e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.448863e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.476824 sec +INFO: No Floating Point Exceptions have been reported + 4,124,218,335 cycles # 2.783 GHz + 8,408,510,612 instructions # 2.04 insn per cycle + 1.482399691 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.510377e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.974414e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.974414e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.989253 sec +INFO: No Floating Point Exceptions have been reported + 3,785,582,278 cycles # 1.899 GHz + 7,698,584,647 instructions # 2.03 insn per cycle + 1.994773359 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183204829693 +Relative difference = 1.5796536184903122e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 6faae54f7c..ac99bf7b60 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:14:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:13:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.932877e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.456336e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.477647e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.430186 sec -INFO: No Floating Point Exceptions have been reported - 1,011,551,860 cycles:u # 2.308 GHz (75.93%) - 2,474,973 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.46%) - 5,574,299 stalled-cycles-backend:u # 0.55% backend cycles idle (74.50%) - 1,495,779,097 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (74.63%) - 0.628695657 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.415407e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.358342e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002564e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.531223 sec +INFO: No Floating Point Exceptions have been reported + 2,212,715,399 cycles # 2.883 GHz + 3,174,354,481 instructions # 1.43 insn per cycle + 0.824625337 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.516895e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.579376e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.579376e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.354211 sec -INFO: No Floating Point Exceptions have been reported - 14,455,072,921 cycles:u # 3.309 GHz (75.00%) - 8,798,887 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 3,863,259,247 stalled-cycles-backend:u # 26.73% backend cycles idle (74.91%) - 45,859,438,536 instructions:u # 3.17 insn per cycle - # 0.08 stalled cycles per insn (74.95%) - 4.430543616 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.812224e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.858502e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.858502e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.893700 sec +INFO: No Floating Point Exceptions have been reported + 17,384,515,155 cycles # 2.947 GHz + 46,085,827,160 instructions # 2.65 insn per cycle + 5.899425018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.234560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.423000e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.423000e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.661992 sec -INFO: No Floating Point Exceptions have been reported - 8,892,538,145 cycles:u # 3.324 GHz (74.91%) - 9,095,708 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.88%) - 2,749,417,047 stalled-cycles-backend:u # 30.92% backend cycles idle (75.03%) - 27,514,557,439 instructions:u # 3.09 insn per cycle - # 0.10 stalled cycles per insn (75.17%) - 2.736935695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.230178e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393068e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.353996 sec +INFO: No Floating Point Exceptions have been reported + 9,906,776,741 cycles # 2.949 GHz + 27,581,204,322 instructions # 2.78 insn per cycle + 3.359750594 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.387440e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.936802e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.936802e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.603726 sec -INFO: No Floating Point Exceptions have been reported - 5,224,190,372 cycles:u # 3.230 GHz (75.17%) - 9,719,986 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.12%) - 942,360,412 stalled-cycles-backend:u # 18.04% backend cycles idle (74.72%) - 12,352,759,131 instructions:u # 2.36 insn per cycle - # 0.08 stalled cycles per insn (74.60%) - 1.699215859 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.070803e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.467527e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.467527e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.175848 sec +INFO: No Floating Point Exceptions have been reported + 6,033,401,789 cycles # 2.767 GHz + 12,481,778,172 instructions # 2.07 insn per cycle + 2.181604261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2773) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.576261e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.054850e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.054850e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.987931 sec +INFO: No Floating Point Exceptions have been reported + 5,526,359,959 cycles # 2.773 GHz + 11,919,157,674 instructions # 2.16 insn per cycle + 1.993761374 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2518) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.583667e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.773486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773486e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.034227 sec +INFO: No Floating Point Exceptions have been reported + 5,618,120,727 cycles # 1.849 GHz + 8,105,692,593 instructions # 1.44 insn per cycle + 3.040009315 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1862) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 2e1f89feb6..d60a3db604 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_17:14:23 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:13:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.981612e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.541037e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.563162e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.401933 sec -INFO: No Floating Point Exceptions have been reported - 965,281,867 cycles:u # 2.286 GHz (75.43%) - 2,505,651 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.44%) - 6,141,178 stalled-cycles-backend:u # 0.64% backend cycles idle (74.76%) - 1,610,457,772 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (75.15%) - 0.585324236 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.391860e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.272095e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.937370e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.534370 sec +INFO: No Floating Point Exceptions have been reported + 2,212,045,639 cycles # 2.882 GHz + 3,154,512,029 instructions # 1.43 insn per cycle + 0.826500836 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.566258e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.630147e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.630147e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.271754 sec -INFO: No Floating Point Exceptions have been reported - 14,501,888,548 cycles:u # 3.384 GHz (74.99%) - 8,976,778 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) - 3,364,261,054 stalled-cycles-backend:u # 23.20% backend cycles idle (75.00%) - 44,578,588,606 instructions:u # 3.07 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 4.362082746 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 590) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.857330e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.905433e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.905433e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.752139 sec +INFO: No Floating Point Exceptions have been reported + 16,956,103,485 cycles # 2.946 GHz + 45,111,671,387 instructions # 2.66 insn per cycle + 5.757950281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.583365e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.804007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.804007e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.471505 sec -INFO: No Floating Point Exceptions have been reported - 8,278,088,324 cycles:u # 3.331 GHz (74.92%) - 9,533,878 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.92%) - 1,272,690,434 stalled-cycles-backend:u # 15.37% backend cycles idle (74.89%) - 26,382,503,426 instructions:u # 3.19 insn per cycle - # 0.05 stalled cycles per insn (75.05%) - 2.553064814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2312) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.369201e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.545470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.545470e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.220654 sec +INFO: No Floating Point Exceptions have been reported + 9,518,675,134 cycles # 2.951 GHz + 26,252,301,051 instructions # 2.76 insn per cycle + 3.226704286 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.441479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.854446e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.854446e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.812364 sec -INFO: No Floating Point Exceptions have been reported - 5,984,800,024 cycles:u # 3.278 GHz (74.93%) - 8,242,587 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.03%) - 1,798,859,897 stalled-cycles-backend:u # 30.06% backend cycles idle (75.03%) - 13,980,239,803 instructions:u # 2.34 insn per cycle - # 0.13 stalled cycles per insn (75.03%) - 1.897920283 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.516544e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.830416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.830416e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.429009 sec +INFO: No Floating Point Exceptions have been reported + 6,737,120,781 cycles # 2.769 GHz + 14,029,549,404 instructions # 2.08 insn per cycle + 2.434732608 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.763106e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.113046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.113046e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.308488 sec +INFO: No Floating Point Exceptions have been reported + 6,400,709,122 cycles # 2.767 GHz + 13,521,645,446 instructions # 2.11 insn per cycle + 2.314138282 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.631126e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.827064e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.827064e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.996339 sec +INFO: No Floating Point Exceptions have been reported + 5,581,413,243 cycles # 1.860 GHz + 9,205,937,992 instructions # 1.65 insn per cycle + 3.002095742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index ff8a6789a6..de5eca26a8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:14:36 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:13:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.457022e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.560381e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.562252e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.563088 sec -INFO: No Floating Point Exceptions have been reported - 1,184,409,252 cycles:u # 2.674 GHz (74.37%) - 2,616,298 stalled-cycles-frontend:u # 0.22% frontend cycles idle (73.85%) - 8,712,145 stalled-cycles-backend:u # 0.74% backend cycles idle (73.91%) - 1,724,638,719 instructions:u # 1.46 insn per cycle - # 0.01 stalled cycles per insn (75.75%) - 0.700799232 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.672201e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.887935e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.992853e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.477007 sec +INFO: No Floating Point Exceptions have been reported + 1,987,624,447 cycles # 2.874 GHz + 2,861,967,134 instructions # 1.44 insn per cycle + 0.751704376 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.606899e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.729548e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.732092e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.720731 sec -INFO: No Floating Point Exceptions have been reported - 2,032,976,036 cycles:u # 2.726 GHz (74.65%) - 2,410,844 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.42%) - 12,000,473 stalled-cycles-backend:u # 0.59% backend cycles idle (74.45%) - 2,481,165,172 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 0.779185094 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.044656e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231568e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242034e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.614374 sec +INFO: No Floating Point Exceptions have been reported + 2,464,089,898 cycles # 2.883 GHz + 3,693,413,015 instructions # 1.50 insn per cycle + 0.914175309 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.353659e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.368809e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.368809e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.910166 sec -INFO: No Floating Point Exceptions have been reported - 16,973,945,281 cycles:u # 3.455 GHz (74.93%) - 2,851,139 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) - 3,581,679,834 stalled-cycles-backend:u # 21.10% backend cycles idle (75.05%) - 57,020,220,816 instructions:u # 3.36 insn per cycle - # 0.06 stalled cycles per insn (75.09%) - 4.985447360 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.435389e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.447579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.447579e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.748690 sec +INFO: No Floating Point Exceptions have been reported + 19,905,580,584 cycles # 2.948 GHz + 59,914,464,179 instructions # 3.01 insn per cycle + 6.753011110 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.536241e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.593971e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.593971e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.531297 sec -INFO: No Floating Point Exceptions have been reported - 8,802,208,640 cycles:u # 3.472 GHz (74.91%) - 2,137,896 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.02%) - 1,782,641,515 stalled-cycles-backend:u # 20.25% backend cycles idle (75.07%) - 29,959,209,250 instructions:u # 3.40 insn per cycle - # 0.06 stalled cycles per insn (75.07%) - 2.621443926 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.605126e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.648126e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.648126e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.577729 sec +INFO: No Floating Point Exceptions have been reported + 10,567,541,735 cycles # 2.951 GHz + 31,084,954,146 instructions # 2.94 insn per cycle + 3.582009862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.319296e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.343516e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.343516e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.273309 sec -INFO: No Floating Point Exceptions have been reported - 4,370,369,702 cycles:u # 3.440 GHz (74.82%) - 2,062,289 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.81%) - 1,128,447,471 stalled-cycles-backend:u # 25.82% backend cycles idle (74.86%) - 11,112,411,564 instructions:u # 2.54 insn per cycle - # 0.10 stalled cycles per insn (75.12%) - 1.369812125 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.119843e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.286275e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.286275e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.816628 sec +INFO: No Floating Point Exceptions have been reported + 5,009,875,098 cycles # 2.752 GHz + 11,404,863,740 instructions # 2.28 insn per cycle + 1.820981146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.027376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.048667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.048667e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.614828 sec +INFO: No Floating Point Exceptions have been reported + 4,447,516,452 cycles # 2.748 GHz + 10,663,621,215 instructions # 2.40 insn per cycle + 1.619180273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.153517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.257338e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.257338e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.312792 sec +INFO: No Floating Point Exceptions have been reported + 4,128,948,366 cycles # 1.783 GHz + 5,970,641,302 instructions # 1.45 insn per cycle + 2.317202499 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index ac5312cb18..9c43264546 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_18:06:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:53:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.224846e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.543995e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.543995e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.582869 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,765,589,761 cycles:u # 2.931 GHz (75.25%) - 7,035,236 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.97%) - 261,985,210 stalled-cycles-backend:u # 14.84% backend cycles idle (75.82%) - 2,169,843,448 instructions:u # 1.23 insn per cycle - # 0.12 stalled cycles per insn (74.85%) - 0.635912960 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.507916e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.178599e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.178599e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.502648 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,068,761,834 cycles # 2.877 GHz + 3,090,755,102 instructions # 1.49 insn per cycle + 0.775689457 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.821635e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.643478e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.643478e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 -TOTAL : 1.347374 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,073,598,998 cycles:u # 2.950 GHz (75.06%) - 16,322,403 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.13%) - 834,923,477 stalled-cycles-backend:u # 20.50% backend cycles idle (74.95%) - 4,169,863,141 instructions:u # 1.02 insn per cycle - # 0.20 stalled cycles per insn (75.35%) - 1.419348394 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.673734e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.373672e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.373672e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.833395 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,144,886,808 cycles # 2.895 GHz + 5,022,532,373 instructions # 1.60 insn per cycle + 1.144806482 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.296074e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.310817e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310817e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.999365 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,306,681,704 cycles:u # 3.460 GHz (74.91%) - 2,853,983 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) - 3,674,045,388 stalled-cycles-backend:u # 21.23% backend cycles idle (75.01%) - 56,907,854,742 instructions:u # 3.29 insn per cycle - # 0.06 stalled cycles per insn (75.05%) - 5.007805214 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.430493e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.443016e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.443016e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.770345 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,935,799,744 cycles # 2.943 GHz + 59,921,717,219 instructions # 3.01 insn per cycle + 6.775096176 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.510110e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.567162e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.567162e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.544813 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,813,377,561 cycles:u # 3.459 GHz (74.89%) - 1,975,694 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) - 1,765,423,443 stalled-cycles-backend:u # 20.03% backend cycles idle (74.90%) - 29,975,613,508 instructions:u # 3.40 insn per cycle - # 0.06 stalled cycles per insn (75.01%) - 2.552904627 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.571029e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.615207e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.615207e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.613337 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,624,808,815 cycles # 2.938 GHz + 31,136,068,452 instructions # 2.93 insn per cycle + 3.618153867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.306142e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.330343e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.330343e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.282549 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,428,624,680 cycles:u # 3.444 GHz (74.75%) - 689,538 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 1,170,114,158 stalled-cycles-backend:u # 26.42% backend cycles idle (75.12%) - 11,121,515,676 instructions:u # 2.51 insn per cycle - # 0.11 stalled cycles per insn (75.12%) - 1.290561471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.976002e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.144750e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.144750e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.854528 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,122,960,994 cycles # 2.757 GHz + 11,456,752,385 instructions # 2.24 insn per cycle + 1.859209871 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.023623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045107e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.629549 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,493,284,400 cycles # 2.751 GHz + 10,714,819,935 instructions # 2.38 insn per cycle + 1.634203375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.121040e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.229108e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.229108e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.332216 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,174,771,858 cycles # 1.787 GHz + 6,010,349,590 instructions # 1.44 insn per cycle + 2.336931936 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index f634fe824c..8cdcf50b56 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:14:50 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:14:23 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.453569e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.558130e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.560094e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.443988 sec -INFO: No Floating Point Exceptions have been reported - 1,154,023,417 cycles:u # 2.635 GHz (75.61%) - 2,680,026 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.04%) - 8,282,079 stalled-cycles-backend:u # 0.72% backend cycles idle (75.44%) - 1,744,767,704 instructions:u # 1.51 insn per cycle - # 0.00 stalled cycles per insn (74.67%) - 0.573796584 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.625266e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.900146e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.003656e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.475988 sec +INFO: No Floating Point Exceptions have been reported + 1,977,726,110 cycles # 2.850 GHz + 2,827,901,574 instructions # 1.43 insn per cycle + 0.751347521 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.623090e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.749531e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.752290e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.731632 sec -INFO: No Floating Point Exceptions have been reported - 2,026,109,757 cycles:u # 2.737 GHz (74.27%) - 2,453,713 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.47%) - 7,820,498 stalled-cycles-backend:u # 0.39% backend cycles idle (75.65%) - 2,448,337,363 instructions:u # 1.21 insn per cycle - # 0.00 stalled cycles per insn (75.70%) - 0.794710009 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.046576e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.234116e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.244538e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.611168 sec +INFO: No Floating Point Exceptions have been reported + 2,454,142,578 cycles # 2.886 GHz + 3,695,001,143 instructions # 1.51 insn per cycle + 0.909771724 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.496489e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.513469e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.513469e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.710710 sec -INFO: No Floating Point Exceptions have been reported - 16,372,965,323 cycles:u # 3.473 GHz (74.86%) - 2,465,696 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.95%) - 3,842,922,790 stalled-cycles-backend:u # 23.47% backend cycles idle (75.03%) - 56,492,878,028 instructions:u # 3.45 insn per cycle - # 0.07 stalled cycles per insn (75.06%) - 4.821352012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 924) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.436838e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.448877e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.448877e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.744113 sec +INFO: No Floating Point Exceptions have been reported + 19,898,434,725 cycles # 2.949 GHz + 60,128,447,647 instructions # 3.02 insn per cycle + 6.748351399 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432427 -Relative difference = 4.4692302386886357e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.246723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.299838e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.299838e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.647063 sec -INFO: No Floating Point Exceptions have been reported - 9,148,950,670 cycles:u # 3.451 GHz (75.10%) - 2,224,960 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) - 2,631,107,871 stalled-cycles-backend:u # 28.76% backend cycles idle (75.03%) - 30,439,427,557 instructions:u # 3.33 insn per cycle - # 0.09 stalled cycles per insn (74.96%) - 2.794627265 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4697) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.649169e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.692956e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692956e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.544209 sec +INFO: No Floating Point Exceptions have been reported + 10,481,283,758 cycles # 2.954 GHz + 30,686,827,574 instructions # 2.93 insn per cycle + 3.548515404 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.211077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231611e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.231611e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.377740 sec -INFO: No Floating Point Exceptions have been reported - 4,740,821,780 cycles:u # 3.433 GHz (74.78%) - 2,040,865 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.02%) - 1,475,086,457 stalled-cycles-backend:u # 31.11% backend cycles idle (75.10%) - 11,727,325,539 instructions:u # 2.47 insn per cycle - # 0.13 stalled cycles per insn (75.10%) - 1.453655303 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4465) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.897572e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.058943e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.058943e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.861619 sec +INFO: No Floating Point Exceptions have been reported + 5,141,047,361 cycles # 2.756 GHz + 11,838,355,420 instructions # 2.30 insn per cycle + 1.866119668 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4746) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.640218e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.828831e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.828831e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.719787 sec +INFO: No Floating Point Exceptions have been reported + 4,732,734,719 cycles # 2.746 GHz + 11,163,471,114 instructions # 2.36 insn per cycle + 1.724312193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 246) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.072241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.175446e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.175446e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.339175 sec +INFO: No Floating Point Exceptions have been reported + 4,159,319,454 cycles # 1.776 GHz + 6,222,343,045 instructions # 1.50 insn per cycle + 2.343565013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1516) (512y: 139) (512z: 3679) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 5245312e04..b9aad18eeb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:15:04 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:14:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.088422e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576264e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.583915e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 -TOTAL : 0.370776 sec -INFO: No Floating Point Exceptions have been reported - 948,898,570 cycles:u # 2.522 GHz (74.93%) - 2,404,849 stalled-cycles-frontend:u # 0.25% frontend cycles idle (76.45%) - 11,570,294 stalled-cycles-backend:u # 1.22% backend cycles idle (74.24%) - 1,473,357,888 instructions:u # 1.55 insn per cycle - # 0.01 stalled cycles per insn (75.76%) - 0.511369879 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.682161e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.012912e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.052707e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.457449 sec +INFO: No Floating Point Exceptions have been reported + 1,934,954,114 cycles # 2.865 GHz + 2,736,882,841 instructions # 1.41 insn per cycle + 0.732650423 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.899042e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.025660e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.027519e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.520560 sec -INFO: No Floating Point Exceptions have been reported - 1,344,404,243 cycles:u # 2.601 GHz (74.57%) - 2,658,037 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.26%) - 4,825,135 stalled-cycles-backend:u # 0.36% backend cycles idle (74.44%) - 1,788,812,845 instructions:u # 1.33 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 0.579967163 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.683155e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385425e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.426136e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.507459 sec +INFO: No Floating Point Exceptions have been reported + 2,120,407,613 cycles # 2.883 GHz + 3,024,448,335 instructions # 1.43 insn per cycle + 0.792985016 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.674362e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.693480e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.693480e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.481835 sec -INFO: No Floating Point Exceptions have been reported - 15,527,623,702 cycles:u # 3.462 GHz (74.93%) - 1,358,327 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) - 2,474,889,742 stalled-cycles-backend:u # 15.94% backend cycles idle (75.03%) - 56,588,613,692 instructions:u # 3.64 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 4.569830402 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.506015e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.518972e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518972e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.556943 sec +INFO: No Floating Point Exceptions have been reported + 19,264,218,294 cycles # 2.937 GHz + 59,614,798,383 instructions # 3.09 insn per cycle + 6.560956742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859809517598 -Relative difference = 1.3480841507557613e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.142338e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160857e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.160857e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.456926 sec -INFO: No Floating Point Exceptions have been reported - 5,046,759,545 cycles:u # 3.454 GHz (74.96%) - 1,785,779 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.85%) - 1,556,603,635 stalled-cycles-backend:u # 30.84% backend cycles idle (74.82%) - 16,277,524,788 instructions:u # 3.23 insn per cycle - # 0.10 stalled cycles per insn (74.82%) - 1.557952037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.070356e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.207853e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.207853e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.048077 sec +INFO: No Floating Point Exceptions have been reported + 6,023,874,049 cycles # 2.936 GHz + 17,061,893,848 instructions # 2.83 insn per cycle + 2.052246672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857731430207 -Relative difference = 1.6055147002442227e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.446494e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.531677e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.531677e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.692125 sec -INFO: No Floating Point Exceptions have been reported - 2,383,806,406 cycles:u # 3.426 GHz (74.81%) - 1,772,559 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.72%) - 741,384,713 stalled-cycles-backend:u # 31.10% backend cycles idle (74.72%) - 6,038,789,305 instructions:u # 2.53 insn per cycle - # 0.12 stalled cycles per insn (74.64%) - 0.790224849 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.743575e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804848e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.957876 sec +INFO: No Floating Point Exceptions have been reported + 2,640,887,772 cycles # 2.747 GHz + 6,187,336,173 instructions # 2.34 insn per cycle + 0.962119669 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.915124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989470e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.873685 sec +INFO: No Floating Point Exceptions have been reported + 2,402,820,009 cycles # 2.739 GHz + 5,790,162,566 instructions # 2.41 insn per cycle + 0.877828237 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.453255e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.496895e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496895e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.147590 sec +INFO: No Floating Point Exceptions have been reported + 2,076,037,431 cycles # 1.804 GHz + 3,391,394,333 instructions # 1.63 insn per cycle + 1.151886126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 065f27f41c..1d937591ab 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_18:06:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:54:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.218326e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.576933e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576933e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.755508e+02 +- 2.671054e+02 ) GeV^-2 -TOTAL : 0.526714 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,521,602,665 cycles:u # 2.785 GHz (76.58%) - 7,096,429 stalled-cycles-frontend:u # 0.47% frontend cycles idle (76.26%) - 261,488,952 stalled-cycles-backend:u # 17.19% backend cycles idle (75.53%) - 2,022,238,545 instructions:u # 1.33 insn per cycle - # 0.13 stalled cycles per insn (73.87%) - 0.575439595 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.452792e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.504415e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.504415e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.473960 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,972,315,192 cycles # 2.868 GHz + 2,911,549,142 instructions # 1.48 insn per cycle + 0.746422585 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.625160e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.546838e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.546838e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.855939e+03 +- 1.791987e+03 ) GeV^-2 -TOTAL : 1.140152 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,390,394,734 cycles:u # 2.958 GHz (75.04%) - 16,797,318 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.69%) - 842,692,687 stalled-cycles-backend:u # 24.86% backend cycles idle (74.65%) - 3,619,427,436 instructions:u # 1.07 insn per cycle - # 0.23 stalled cycles per insn (74.63%) - 1.202730605 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.537799e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.260766e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.260766e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 +TOTAL : 0.654007 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,561,684,908 cycles # 2.881 GHz + 3,893,804,940 instructions # 1.52 insn per cycle + 0.947747663 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.657263e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676339e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.676339e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.504604 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,586,193,933 cycles:u # 3.458 GHz (74.99%) - 2,459,790 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) - 2,460,055,112 stalled-cycles-backend:u # 15.78% backend cycles idle (74.98%) - 56,619,213,385 instructions:u # 3.63 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 4.512308073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.511557e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.524668e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.524668e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.546653 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,271,491,646 cycles # 2.942 GHz + 59,619,016,957 instructions # 3.09 insn per cycle + 6.550964309 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859809517598 -Relative difference = 1.3480841507557613e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.133341e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.151657e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.151657e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.470448 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,086,506,407 cycles:u # 3.452 GHz (75.07%) - 1,843,913 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.03%) - 1,597,361,677 stalled-cycles-backend:u # 31.40% backend cycles idle (75.03%) - 16,294,987,883 instructions:u # 3.20 insn per cycle - # 0.10 stalled cycles per insn (75.03%) - 1.478241212 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.075407e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.222998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.222998e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 2.052234 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,045,406,228 cycles # 2.940 GHz + 17,110,194,161 instructions # 2.83 insn per cycle + 2.056632379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857731430207 -Relative difference = 1.6055147002442227e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.437539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.521085e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.521085e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.696690 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,399,754,400 cycles:u # 3.430 GHz (74.86%) - 1,783,318 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.85%) - 734,442,519 stalled-cycles-backend:u # 30.60% backend cycles idle (74.85%) - 6,058,668,674 instructions:u # 2.52 insn per cycle - # 0.12 stalled cycles per insn (74.85%) - 0.704553472 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.741961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804731e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.963728 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,665,943,840 cycles # 2.756 GHz + 6,224,556,067 instructions # 2.33 insn per cycle + 0.968076233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.889422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966151e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966151e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.890681 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,436,025,235 cycles # 2.723 GHz + 5,827,123,635 instructions # 2.39 insn per cycle + 0.895318545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.443092e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.487537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.487537e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.160657 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,101,025,117 cycles # 1.805 GHz + 3,433,428,500 instructions # 1.63 insn per cycle + 1.165027687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index eabcf1b0aa..4251937b55 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:15:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:15:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.173106e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.633608e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.641451e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 -TOTAL : 0.367062 sec -INFO: No Floating Point Exceptions have been reported - 952,943,414 cycles:u # 2.503 GHz (75.96%) - 2,360,452 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.84%) - 11,754,320 stalled-cycles-backend:u # 1.23% backend cycles idle (75.56%) - 1,498,730,840 instructions:u # 1.57 insn per cycle - # 0.01 stalled cycles per insn (76.07%) - 0.538153724 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.677136e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.031870e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.066369e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.462735 sec +INFO: No Floating Point Exceptions have been reported + 1,946,975,962 cycles # 2.877 GHz + 2,736,807,999 instructions # 1.41 insn per cycle + 0.735970844 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.857646e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014449e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.016289e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.492332 sec -INFO: No Floating Point Exceptions have been reported - 1,311,451,654 cycles:u # 2.565 GHz (75.44%) - 2,488,966 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.80%) - 6,593,159 stalled-cycles-backend:u # 0.50% backend cycles idle (75.43%) - 1,831,245,848 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 0.549050391 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.680283e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.366147e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.409371e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.508125 sec +INFO: No Floating Point Exceptions have been reported + 2,113,521,464 cycles # 2.866 GHz + 3,052,176,829 instructions # 1.44 insn per cycle + 0.794975331 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.654108e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672913e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672913e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.506859 sec -INFO: No Floating Point Exceptions have been reported - 15,465,039,739 cycles:u # 3.429 GHz (75.00%) - 2,298,886 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 2,761,224,980 stalled-cycles-backend:u # 17.85% backend cycles idle (74.99%) - 56,323,382,399 instructions:u # 3.64 insn per cycle - # 0.05 stalled cycles per insn (74.99%) - 4.574466306 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.498167e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510888e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510888e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.577268 sec +INFO: No Floating Point Exceptions have been reported + 19,407,580,643 cycles # 2.949 GHz + 59,354,263,399 instructions # 3.06 insn per cycle + 6.581442326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859511640177 -Relative difference = 3.456225494743424e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.164495e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184500e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.184500e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.429209 sec -INFO: No Floating Point Exceptions have been reported - 4,860,552,892 cycles:u # 3.392 GHz (75.24%) - 1,895,413 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.16%) - 1,475,677,075 stalled-cycles-backend:u # 30.36% backend cycles idle (74.95%) - 16,358,411,245 instructions:u # 3.37 insn per cycle - # 0.09 stalled cycles per insn (74.88%) - 1.541885001 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5045) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.398785e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.549497e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.549497e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.968439 sec +INFO: No Floating Point Exceptions have been reported + 5,775,824,576 cycles # 2.929 GHz + 16,849,685,670 instructions # 2.92 insn per cycle + 1.972573842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129858306637857 -Relative difference = 1.1984281117008586e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.108598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.172019e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.172019e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.799467 sec -INFO: No Floating Point Exceptions have been reported - 2,731,164,326 cycles:u # 3.399 GHz (75.00%) - 1,756,161 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.19%) - 823,276,493 stalled-cycles-backend:u # 30.14% backend cycles idle (75.11%) - 6,696,704,556 instructions:u # 2.45 insn per cycle - # 0.12 stalled cycles per insn (75.12%) - 0.894493118 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5386) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.527004e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.573961e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.573961e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.091009 sec +INFO: No Floating Point Exceptions have been reported + 3,021,095,483 cycles # 2.760 GHz + 6,848,870,145 instructions # 2.27 insn per cycle + 1.095189540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.611080e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.664155e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.664155e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.035237 sec +INFO: No Floating Point Exceptions have been reported + 2,858,508,214 cycles # 2.752 GHz + 6,438,110,737 instructions # 2.25 insn per cycle + 1.039480125 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.329594e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.366106e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.366106e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.252535 sec +INFO: No Floating Point Exceptions have been reported + 2,255,457,879 cycles # 1.796 GHz + 3,755,585,205 instructions # 1.67 insn per cycle + 1.256791945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 28) (512z: 4084) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 575c62cc1a..09551986c9 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:15:28 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:15:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.435119e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.539348e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.541237e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.463804 sec -INFO: No Floating Point Exceptions have been reported - 1,178,530,093 cycles:u # 2.645 GHz (75.92%) - 2,606,684 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.53%) - 5,150,807 stalled-cycles-backend:u # 0.44% backend cycles idle (76.07%) - 1,656,880,144 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (74.17%) - 0.595192337 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.594581e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.871606e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.970951e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473359 sec +INFO: No Floating Point Exceptions have been reported + 1,992,272,995 cycles # 2.881 GHz + 2,873,441,271 instructions # 1.44 insn per cycle + 0.748198068 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.600834e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.726711e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.729242e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.725011 sec -INFO: No Floating Point Exceptions have been reported - 1,977,744,096 cycles:u # 2.638 GHz (75.57%) - 2,415,379 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.38%) - 6,005,818 stalled-cycles-backend:u # 0.30% backend cycles idle (75.23%) - 2,514,772,251 instructions:u # 1.27 insn per cycle - # 0.00 stalled cycles per insn (74.24%) - 0.791388927 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.037058e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.222746e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233058e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.613274 sec +INFO: No Floating Point Exceptions have been reported + 2,464,972,790 cycles # 2.891 GHz + 3,748,511,486 instructions # 1.52 insn per cycle + 0.912099116 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.365200e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.380510e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.380510e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.893360 sec -INFO: No Floating Point Exceptions have been reported - 17,103,424,613 cycles:u # 3.492 GHz (75.01%) - 2,386,149 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 3,979,466,539 stalled-cycles-backend:u # 23.27% backend cycles idle (75.01%) - 57,719,685,111 instructions:u # 3.37 insn per cycle - # 0.07 stalled cycles per insn (75.01%) - 4.961790583 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1219) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.399672e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.411896e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.411896e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.848810 sec +INFO: No Floating Point Exceptions have been reported + 20,197,037,339 cycles # 2.948 GHz + 60,947,415,438 instructions # 3.02 insn per cycle + 6.853052511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.397129e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.453309e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.453309e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.585749 sec -INFO: No Floating Point Exceptions have been reported - 8,951,817,863 cycles:u # 3.457 GHz (74.98%) - 2,245,079 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.98%) - 2,261,108,188 stalled-cycles-backend:u # 25.26% backend cycles idle (74.98%) - 29,715,594,187 instructions:u # 3.32 insn per cycle - # 0.08 stalled cycles per insn (74.98%) - 2.706615239 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4755) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.642526e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.687090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.687090e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.549259 sec +INFO: No Floating Point Exceptions have been reported + 10,477,481,501 cycles # 2.949 GHz + 30,820,930,825 instructions # 2.94 insn per cycle + 3.553666211 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.330087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.354301e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.354301e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.256022 sec -INFO: No Floating Point Exceptions have been reported - 4,374,586,886 cycles:u # 3.473 GHz (74.72%) - 2,021,531 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.70%) - 1,211,058,173 stalled-cycles-backend:u # 27.68% backend cycles idle (75.02%) - 11,045,449,646 instructions:u # 2.52 insn per cycle - # 0.11 stalled cycles per insn (75.23%) - 1.312603944 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4405) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.196444e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.370621e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.370621e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.802004 sec +INFO: No Floating Point Exceptions have been reported + 4,965,652,288 cycles # 2.750 GHz + 11,359,248,854 instructions # 2.29 insn per cycle + 1.806342805 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.041756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.063436e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063436e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.592695 sec +INFO: No Floating Point Exceptions have been reported + 4,382,366,442 cycles # 2.746 GHz + 10,608,797,295 instructions # 2.42 insn per cycle + 1.596978533 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 84) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.957560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.055998e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.055998e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.377375 sec +INFO: No Floating Point Exceptions have been reported + 4,237,877,454 cycles # 1.780 GHz + 6,168,521,326 instructions # 1.46 insn per cycle + 2.381770690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2143) (512y: 116) (512z: 3653) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 77b409ec9c..e31dab3bcb 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-09-18_17:15:42 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:15:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.444533e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.547416e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.549334e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.436104 sec -INFO: No Floating Point Exceptions have been reported - 1,205,766,715 cycles:u # 2.767 GHz (75.51%) - 2,657,595 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.40%) - 5,124,684 stalled-cycles-backend:u # 0.43% backend cycles idle (76.16%) - 1,646,974,323 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (76.43%) - 0.600457511 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.665772e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.933205e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.041936e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.474243 sec +INFO: No Floating Point Exceptions have been reported + 1,991,405,654 cycles # 2.879 GHz + 2,864,466,394 instructions # 1.44 insn per cycle + 0.749070557 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.623679e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.750011e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.752595e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.708523 sec -INFO: No Floating Point Exceptions have been reported - 1,983,766,237 cycles:u # 2.710 GHz (75.43%) - 2,538,452 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.45%) - 7,166,285 stalled-cycles-backend:u # 0.36% backend cycles idle (75.98%) - 2,423,186,703 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.771600202 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.042210e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.228789e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239192e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.610923 sec +INFO: No Floating Point Exceptions have been reported + 2,452,672,007 cycles # 2.882 GHz + 3,739,836,978 instructions # 1.52 insn per cycle + 0.910769372 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.400236e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.416076e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.416076e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.843297 sec -INFO: No Floating Point Exceptions have been reported - 16,613,709,794 cycles:u # 3.428 GHz (74.93%) - 2,837,093 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.98%) - 3,460,595,569 stalled-cycles-backend:u # 20.83% backend cycles idle (75.05%) - 57,390,447,744 instructions:u # 3.45 insn per cycle - # 0.06 stalled cycles per insn (75.08%) - 4.926799024 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 866) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.379932e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.391571e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391571e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.905036 sec +INFO: No Floating Point Exceptions have been reported + 20,270,175,803 cycles # 2.935 GHz + 61,175,514,110 instructions # 3.02 insn per cycle + 6.909213331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.398577e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.454629e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.454629e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.585324 sec -INFO: No Floating Point Exceptions have been reported - 8,774,135,894 cycles:u # 3.389 GHz (75.14%) - 1,487,872 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 1,834,490,066 stalled-cycles-backend:u # 20.91% backend cycles idle (74.97%) - 30,092,791,523 instructions:u # 3.43 insn per cycle - # 0.06 stalled cycles per insn (74.97%) - 2.675244899 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4834) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.712701e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.757964e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.757964e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.496353 sec +INFO: No Floating Point Exceptions have been reported + 10,330,450,764 cycles # 2.952 GHz + 30,532,965,755 instructions # 2.96 insn per cycle + 3.500721812 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.213312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.234495e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.234495e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.375018 sec -INFO: No Floating Point Exceptions have been reported - 4,652,993,936 cycles:u # 3.375 GHz (75.05%) - 2,302,267 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) - 1,496,265,905 stalled-cycles-backend:u # 32.16% backend cycles idle (75.05%) - 11,669,982,634 instructions:u # 2.51 insn per cycle - # 0.13 stalled cycles per insn (75.05%) - 1.444304956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4625) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.873461e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.031366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.031366e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.866310 sec +INFO: No Floating Point Exceptions have been reported + 5,149,448,063 cycles # 2.754 GHz + 11,872,714,422 instructions # 2.31 insn per cycle + 1.870704205 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.721277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.910902e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910902e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.705742 sec +INFO: No Floating Point Exceptions have been reported + 4,682,307,882 cycles # 2.740 GHz + 11,166,992,215 instructions # 2.38 insn per cycle + 1.710031590 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.916313e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.015099e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.015099e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.391177 sec +INFO: No Floating Point Exceptions have been reported + 4,255,173,095 cycles # 1.777 GHz + 6,409,630,981 instructions # 1.51 insn per cycle + 2.395610797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 162) (512z: 3731) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 59b911f1d8..e60a3b56f2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:15:56 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:16:23 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.204596e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259417e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259568e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.715128 sec -INFO: No Floating Point Exceptions have been reported - 1,635,437,665 cycles:u # 2.817 GHz (74.94%) - 2,495,684 stalled-cycles-frontend:u # 0.15% frontend cycles idle (76.17%) - 7,104,110 stalled-cycles-backend:u # 0.43% backend cycles idle (76.53%) - 2,096,604,985 instructions:u # 1.28 insn per cycle - # 0.00 stalled cycles per insn (74.05%) - 0.853269091 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.313288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.338946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340893e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.535030 sec +INFO: No Floating Point Exceptions have been reported + 2,203,915,832 cycles # 2.863 GHz + 3,411,363,725 instructions # 1.55 insn per cycle + 0.826937803 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.693369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.698924e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.699024e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.646807 sec -INFO: No Floating Point Exceptions have been reported - 22,366,056,530 cycles:u # 3.352 GHz (75.06%) - 3,203,682 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.10%) - 7,189,313 stalled-cycles-backend:u # 0.03% backend cycles idle (74.99%) - 19,993,459,196 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 6.710008304 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.139082e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.168902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170140e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.048810 sec +INFO: No Floating Point Exceptions have been reported + 9,673,114,822 cycles # 2.925 GHz + 22,022,328,349 instructions # 2.28 insn per cycle + 3.363974995 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.649906e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.651120e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.651120e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.205871 sec -INFO: No Floating Point Exceptions have been reported - 21,471,105,477 cycles:u # 3.462 GHz (75.04%) - 1,097,352 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 3,029,846,667 stalled-cycles-backend:u # 14.11% backend cycles idle (74.98%) - 78,182,978,747 instructions:u # 3.64 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 6.331276745 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.884766e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885678e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885678e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.707888 sec +INFO: No Floating Point Exceptions have been reported + 25,646,480,577 cycles # 2.944 GHz + 78,959,199,970 instructions # 3.08 insn per cycle + 8.712344144 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.442187e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.447228e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.447228e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.031168 sec -INFO: No Floating Point Exceptions have been reported - 10,507,284,560 cycles:u # 3.471 GHz (74.91%) - 411,446 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.91%) - 1,338,802,016 stalled-cycles-backend:u # 12.74% backend cycles idle (74.94%) - 39,389,673,104 instructions:u # 3.75 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 3.139609397 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.525938e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.529103e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.529103e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.657640 sec +INFO: No Floating Point Exceptions have been reported + 13,102,337,051 cycles # 2.811 GHz + 39,559,050,978 instructions # 3.02 insn per cycle + 4.662071177 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.218352e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.220871e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.220871e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.362754 sec -INFO: No Floating Point Exceptions have been reported - 4,685,851,873 cycles:u # 3.447 GHz (74.87%) - 2,379,754 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.73%) - 424,035,996 stalled-cycles-backend:u # 9.05% backend cycles idle (74.81%) - 13,809,013,925 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (75.11%) - 1.473377245 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.037518e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.054750e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.054750e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.047044 sec +INFO: No Floating Point Exceptions have been reported + 5,613,016,028 cycles # 2.737 GHz + 13,823,575,120 instructions # 2.46 insn per cycle + 2.051472192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.172996e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.194283e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.194283e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.794478 sec +INFO: No Floating Point Exceptions have been reported + 4,922,583,154 cycles # 2.738 GHz + 12,506,595,932 instructions # 2.54 insn per cycle + 1.798855063 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.987584e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.999990e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.999990e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.353771 sec +INFO: No Floating Point Exceptions have been reported + 4,138,447,690 cycles # 1.756 GHz + 6,393,230,519 instructions # 1.54 insn per cycle + 2.358130141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index f24b7b8ec2..40b573a43c 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:07:17 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:55:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.220153e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.258909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.258909e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.567109 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,653,191,234 cycles:u # 2.832 GHz (75.46%) - 3,284,095 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.64%) - 49,343,637 stalled-cycles-backend:u # 2.98% backend cycles idle (75.12%) - 2,124,460,499 instructions:u # 1.29 insn per cycle - # 0.02 stalled cycles per insn (74.14%) - 0.620909696 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.976623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.275789e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.275789e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.524913 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,187,535,830 cycles # 2.870 GHz + 3,393,578,749 instructions # 1.55 insn per cycle + 0.821118226 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.633929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.694266e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.694266e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 7.474935 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 25,259,898,764 cycles:u # 3.364 GHz (74.99%) - 38,628,706 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.05%) - 1,093,566,156 stalled-cycles-backend:u # 4.33% backend cycles idle (75.04%) - 22,393,412,318 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 7.543274639 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.647884e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.131075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.131075e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.300824 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,456,540,936 cycles # 2.924 GHz + 23,609,445,897 instructions # 2.26 insn per cycle + 3.631983066 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.654658e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.655890e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.655890e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.189524 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 21,480,768,891 cycles:u # 3.469 GHz (74.94%) - 998,981 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.94%) - 2,950,116,993 stalled-cycles-backend:u # 13.73% backend cycles idle (74.95%) - 78,052,279,312 instructions:u # 3.63 insn per cycle - # 0.04 stalled cycles per insn (75.02%) - 6.198416917 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.884487e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885391e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885391e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.714099 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 25,667,183,320 cycles # 2.944 GHz + 78,962,641,614 instructions # 3.08 insn per cycle + 8.718743024 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.336132e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.341069e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.341069e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.085441 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,494,758,685 cycles:u # 3.398 GHz (74.88%) - 437,035 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.90%) - 1,336,910,000 stalled-cycles-backend:u # 12.74% backend cycles idle (75.01%) - 39,380,034,552 instructions:u # 3.75 insn per cycle - # 0.03 stalled cycles per insn (75.12%) - 3.093820955 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.526771e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.530011e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.530011e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.660484 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,111,732,509 cycles # 2.811 GHz + 39,572,349,146 instructions # 3.02 insn per cycle + 4.665178116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.179996e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.182496e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182496e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.401929 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,719,141,237 cycles:u # 3.358 GHz (74.96%) - 362,701 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 548,949,913 stalled-cycles-backend:u # 11.63% backend cycles idle (74.96%) - 13,799,660,671 instructions:u # 2.92 insn per cycle - # 0.04 stalled cycles per insn (74.96%) - 1.432266244 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.088722e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.106050e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.106050e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.038539 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,622,073,957 cycles # 2.753 GHz + 13,834,285,866 instructions # 2.46 insn per cycle + 2.043264664 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.180474e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.202665e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.202665e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.797187 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,937,816,969 cycles # 2.742 GHz + 12,516,988,109 instructions # 2.53 insn per cycle + 1.801993078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.979563e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.992901e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.992901e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.360775 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,161,078,836 cycles # 1.760 GHz + 6,405,054,232 instructions # 1.54 insn per cycle + 2.365459011 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index d96a0f8e8f..14d3e456fd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:12:07 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:06:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.200527e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259546e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259705e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.295730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.322229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.324249e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.562989 sec -INFO: No Floating Point Exceptions have been reported - 1,658,227,220 cycles:u # 2.870 GHz (75.30%) - 3,326,352 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.44%) - 46,729,112 stalled-cycles-backend:u # 2.82% backend cycles idle (74.98%) - 2,064,261,741 instructions:u # 1.24 insn per cycle - # 0.02 stalled cycles per insn (74.86%) - 0.612342330 seconds time elapsed +TOTAL : 0.520677 sec +INFO: No Floating Point Exceptions have been reported + 2,148,428,756 cycles # 2.846 GHz + 3,383,382,873 instructions # 1.57 insn per cycle + 0.814124974 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.693965e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.700029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.700131e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133518e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.163906e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.165159e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 7.331623 sec -INFO: No Floating Point Exceptions have been reported - 24,894,559,524 cycles:u # 3.383 GHz (74.97%) - 28,356,340 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.04%) - 1,090,496,615 stalled-cycles-backend:u # 4.38% backend cycles idle (75.06%) - 21,642,040,039 instructions:u # 0.87 insn per cycle - # 0.05 stalled cycles per insn (74.98%) - 7.389428416 seconds time elapsed +TOTAL : 3.142841 sec +INFO: No Floating Point Exceptions have been reported + 9,913,272,493 cycles # 2.915 GHz + 21,406,834,972 instructions # 2.16 insn per cycle + 3.457413936 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.624062e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.625295e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.625295e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.883085e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.883982e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.883982e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.258293 sec -INFO: No Floating Point Exceptions have been reported - 21,742,847,303 cycles:u # 3.473 GHz (74.95%) - 1,362,438 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 3,135,647,236 stalled-cycles-backend:u # 14.42% backend cycles idle (74.96%) - 78,097,122,846 instructions:u # 3.59 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 6.262435029 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.717475 sec +INFO: No Floating Point Exceptions have been reported + 25,650,608,073 cycles # 2.942 GHz + 78,955,783,568 instructions # 3.08 insn per cycle + 8.721774217 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.436883e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.442297e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.442297e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.525568e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.528763e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.528763e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.025203 sec -INFO: No Floating Point Exceptions have been reported - 10,491,611,008 cycles:u # 3.466 GHz (74.90%) - 420,708 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.90%) - 1,336,455,062 stalled-cycles-backend:u # 12.74% backend cycles idle (74.90%) - 39,419,328,691 instructions:u # 3.76 insn per cycle - # 0.03 stalled cycles per insn (74.92%) - 3.029267840 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.659766 sec +INFO: No Floating Point Exceptions have been reported + 13,093,152,498 cycles # 2.808 GHz + 39,558,598,891 instructions # 3.02 insn per cycle + 4.664046020 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.213233e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.215919e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.215919e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.044580e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.060991e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.060991e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.360185 sec -INFO: No Floating Point Exceptions have been reported - 4,710,598,250 cycles:u # 3.458 GHz (74.76%) - 311,599 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.75%) - 547,714,218 stalled-cycles-backend:u # 11.63% backend cycles idle (74.66%) - 13,803,559,147 instructions:u # 2.93 insn per cycle - # 0.04 stalled cycles per insn (74.96%) - 1.364167546 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +TOTAL : 2.046898 sec +INFO: No Floating Point Exceptions have been reported + 5,615,094,940 cycles # 2.739 GHz + 13,822,846,005 instructions # 2.46 insn per cycle + 2.051140101 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.166765e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.187474e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.187474e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.797318 sec +INFO: No Floating Point Exceptions have been reported + 4,920,345,742 cycles # 2.732 GHz + 12,503,437,535 instructions # 2.54 insn per cycle + 1.801597465 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.991343e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.003364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.003364e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.354011 sec +INFO: No Floating Point Exceptions have been reported + 4,141,327,319 cycles # 1.757 GHz + 6,390,315,143 instructions # 1.54 insn per cycle + 2.358468154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 13c360ffa0..bbefe2a8e4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,69 +1,86 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:10:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:00:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.223746e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259235e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259386e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.560758 sec -INFO: No Floating Point Exceptions have been reported - 1,658,584,653 cycles:u # 2.860 GHz (75.27%) - 3,387,727 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.88%) - 47,445,269 stalled-cycles-backend:u # 2.86% backend cycles idle (75.08%) - 2,123,150,894 instructions:u # 1.28 insn per cycle - # 0.02 stalled cycles per insn (74.34%) - 0.604288531 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.061167e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.349241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.351100e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.520345 sec +INFO: No Floating Point Exceptions have been reported + 2,166,689,738 cycles # 2.876 GHz + 3,445,065,863 instructions # 1.59 insn per cycle + 0.812996601 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.643561e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.699482e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.699583e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 7.437252 sec -INFO: No Floating Point Exceptions have been reported - 25,116,954,361 cycles:u # 3.371 GHz (74.99%) - 38,717,522 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.03%) - 1,093,087,629 stalled-cycles-backend:u # 4.35% backend cycles idle (75.05%) - 22,307,973,674 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (74.94%) - 7.495432364 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.727515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.166918e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168161e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.213299 sec +INFO: No Floating Point Exceptions have been reported + 10,170,057,078 cycles # 2.920 GHz + 23,084,015,508 instructions # 2.27 insn per cycle + 3.538645884 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -71,34 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.662302e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.663534e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.663534e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.168443 sec -INFO: No Floating Point Exceptions have been reported - 21,434,527,823 cycles:u # 3.474 GHz (74.98%) - 808,257 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.98%) - 2,951,820,906 stalled-cycles-backend:u # 13.77% backend cycles idle (74.98%) - 78,020,642,412 instructions:u # 3.64 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 6.172663607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.884169e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885066e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885066e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.710738 sec +INFO: No Floating Point Exceptions have been reported + 25,636,302,572 cycles # 2.942 GHz + 78,955,597,829 instructions # 3.08 insn per cycle + 8.714991120 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -106,34 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.443660e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.449374e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.449374e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.020825 sec -INFO: No Floating Point Exceptions have been reported - 10,478,664,066 cycles:u # 3.467 GHz (74.86%) - 410,699 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.86%) - 1,332,399,511 stalled-cycles-backend:u # 12.72% backend cycles idle (74.91%) - 39,427,721,367 instructions:u # 3.76 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 3.024874706 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.516471e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.519747e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519747e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.670214 sec +INFO: No Floating Point Exceptions have been reported + 13,077,998,657 cycles # 2.798 GHz + 39,560,581,640 instructions # 3.02 insn per cycle + 4.674479243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -141,34 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.216044e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.218547e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.218547e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.357082 sec -INFO: No Floating Point Exceptions have been reported - 4,705,478,462 cycles:u # 3.462 GHz (74.70%) - 312,720 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.70%) - 548,334,891 stalled-cycles-backend:u # 11.65% backend cycles idle (74.85%) - 13,827,779,889 instructions:u # 2.94 insn per cycle - # 0.04 stalled cycles per insn (75.14%) - 1.361232418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.487681e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.501903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.501903e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.196522 sec +INFO: No Floating Point Exceptions have been reported + 6,031,690,352 cycles # 2.742 GHz + 13,823,991,565 instructions # 2.29 insn per cycle + 2.200855114 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,16 +186,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.160431e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.182355e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.182355e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.796714 sec +INFO: No Floating Point Exceptions have been reported + 4,915,575,489 cycles # 2.731 GHz + 12,505,831,482 instructions # 2.54 insn per cycle + 1.801025403 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.864510e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.876687e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.876687e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.395867 sec +INFO: No Floating Point Exceptions have been reported + 4,162,633,573 cycles # 1.735 GHz + 6,392,322,352 instructions # 1.54 insn per cycle + 2.400290914 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index fa3d209f08..724af1477d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:16:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:16:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.220747e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.276283e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.276432e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.633489 sec -INFO: No Floating Point Exceptions have been reported - 1,598,196,734 cycles:u # 2.869 GHz (76.05%) - 2,588,604 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.88%) - 6,239,197 stalled-cycles-backend:u # 0.39% backend cycles idle (75.30%) - 2,001,756,680 instructions:u # 1.25 insn per cycle - # 0.00 stalled cycles per insn (75.71%) - 0.793732399 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.313099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.338786e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340713e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536981 sec +INFO: No Floating Point Exceptions have been reported + 2,207,441,775 cycles # 2.862 GHz + 3,435,949,472 instructions # 1.56 insn per cycle + 0.828882621 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.692168e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.697598e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.697698e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.487899 sec -INFO: No Floating Point Exceptions have been reported - 21,985,727,628 cycles:u # 3.384 GHz (75.00%) - 3,266,647 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.02%) - 7,047,220 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) - 19,652,848,907 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (74.94%) - 6.554364423 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.143532e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.173218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174465e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.036857 sec +INFO: No Floating Point Exceptions have been reported + 9,590,647,679 cycles # 2.910 GHz + 22,042,753,111 instructions # 2.30 insn per cycle + 3.351786655 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.643473e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.644776e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.644776e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.220674 sec -INFO: No Floating Point Exceptions have been reported - 21,581,680,162 cycles:u # 3.472 GHz (74.93%) - 1,008,607 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.92%) - 2,799,185,193 stalled-cycles-backend:u # 12.97% backend cycles idle (75.01%) - 78,073,639,400 instructions:u # 3.62 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 6.303141919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4695) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.884971e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.885850e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.885850e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.706995 sec +INFO: No Floating Point Exceptions have been reported + 25,617,517,247 cycles # 2.941 GHz + 78,701,000,615 instructions # 3.07 insn per cycle + 8.711338338 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.420836e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.426085e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.426085e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.042422 sec -INFO: No Floating Point Exceptions have been reported - 10,517,293,190 cycles:u # 3.462 GHz (75.02%) - 809,943 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 1,446,124,865 stalled-cycles-backend:u # 13.75% backend cycles idle (75.01%) - 39,401,088,959 instructions:u # 3.75 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 3.114756938 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11940) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.566075e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.569356e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.569356e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.605078 sec +INFO: No Floating Point Exceptions have been reported + 13,036,001,618 cycles # 2.829 GHz + 39,449,493,817 instructions # 3.03 insn per cycle + 4.609408106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.229527e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.232471e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.232471e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.359120 sec -INFO: No Floating Point Exceptions have been reported - 4,645,902,790 cycles:u # 3.449 GHz (74.98%) - 572,318 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.06%) - 429,391,951 stalled-cycles-backend:u # 9.24% backend cycles idle (75.09%) - 13,818,574,096 instructions:u # 2.97 insn per cycle - # 0.03 stalled cycles per insn (75.09%) - 1.452466793 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10220) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.966836e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.982546e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.982546e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.064889 sec +INFO: No Floating Point Exceptions have been reported + 5,676,808,859 cycles # 2.745 GHz + 13,911,294,100 instructions # 2.45 insn per cycle + 2.069253381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.081065e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.102389e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.102389e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.812330 sec +INFO: No Floating Point Exceptions have been reported + 4,986,765,093 cycles # 2.746 GHz + 12,602,417,777 instructions # 2.53 insn per cycle + 1.816710814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 241) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.944688e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.956851e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.956851e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.368181 sec +INFO: No Floating Point Exceptions have been reported + 4,157,079,693 cycles # 1.753 GHz + 6,500,343,598 instructions # 1.56 insn per cycle + 2.372472342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1754) (512y: 193) (512z: 9382) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 963c30ad93..9c62ee596f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:57:30 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:45:20 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.209030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.267353e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267500e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.561504 sec -INFO: No Floating Point Exceptions have been reported - 1,650,301,145 cycles:u # 2.885 GHz (75.14%) - 2,510,405 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.07%) - 5,346,111 stalled-cycles-backend:u # 0.32% backend cycles idle (75.69%) - 2,016,627,541 instructions:u # 1.22 insn per cycle - # 0.00 stalled cycles per insn (75.39%) - 0.613700386 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.107911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.129674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.131105e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.542394 sec +INFO: No Floating Point Exceptions have been reported + 2,239,231,771 cycles # 2.882 GHz + 3,498,325,403 instructions # 1.56 insn per cycle + 0.833980513 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.692966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.698428e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.698530e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.642295 sec -INFO: No Floating Point Exceptions have been reported - 22,284,819,763 cycles:u # 3.342 GHz (75.02%) - 3,323,965 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.08%) - 5,858,249 stalled-cycles-backend:u # 0.03% backend cycles idle (75.06%) - 19,822,694,807 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 6.709793750 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.758554e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783710e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784685e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.309606 sec +INFO: No Floating Point Exceptions have been reported + 10,428,358,714 cycles # 2.922 GHz + 23,876,781,455 instructions # 2.29 insn per cycle + 3.623869439 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.517393e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.517775e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.517775e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 36.315090 sec -INFO: No Floating Point Exceptions have been reported - 125,097,558,299 cycles:u # 3.445 GHz (75.01%) - 66,311,042 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.00%) - 9,731,272,148 stalled-cycles-backend:u # 7.78% backend cycles idle (75.00%) - 141,068,944,658 instructions:u # 1.13 insn per cycle - # 0.07 stalled cycles per insn (75.00%) - 36.322945141 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21379) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.278929e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.279396e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.279396e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.335228 sec +INFO: No Floating Point Exceptions have been reported + 112,569,296,340 cycles # 2.936 GHz + 144,793,904,773 instructions # 1.29 insn per cycle + 38.339626690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.494754e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.496840e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.496840e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.701163 sec -INFO: No Floating Point Exceptions have been reported - 16,292,346,635 cycles:u # 3.464 GHz (75.00%) - 854,652 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 7,155,618,960 stalled-cycles-backend:u # 43.92% backend cycles idle (75.00%) - 37,489,495,775 instructions:u # 2.30 insn per cycle - # 0.19 stalled cycles per insn (75.00%) - 4.708988517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68150) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.146613e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.149188e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.149188e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.218296 sec +INFO: No Floating Point Exceptions have been reported + 14,745,365,482 cycles # 2.824 GHz + 37,604,718,701 instructions # 2.55 insn per cycle + 5.222619147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.005875e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.014158e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.014158e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.349048 sec -INFO: No Floating Point Exceptions have been reported - 8,128,107,621 cycles:u # 3.456 GHz (74.86%) - 180,109,446 stalled-cycles-frontend:u # 2.22% frontend cycles idle (74.83%) - 4,282,191,528 stalled-cycles-backend:u # 52.68% backend cycles idle (74.86%) - 12,905,149,783 instructions:u # 1.59 insn per cycle - # 0.33 stalled cycles per insn (75.01%) - 2.396249654 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46482) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.373915e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.387237e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.387237e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.230777 sec +INFO: No Floating Point Exceptions have been reported + 6,114,551,945 cycles # 2.737 GHz + 13,052,964,850 instructions # 2.13 insn per cycle + 2.235150749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.869797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.889489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.889489e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.855437 sec +INFO: No Floating Point Exceptions have been reported + 5,079,069,827 cycles # 2.732 GHz + 11,450,297,808 instructions # 2.25 insn per cycle + 1.859852844 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40486) (512y: 285) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.334322e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.348410e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.348410e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.242689 sec +INFO: No Floating Point Exceptions have been reported + 3,955,754,497 cycles # 1.761 GHz + 5,927,045,148 instructions # 1.50 insn per cycle + 2.247181135 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39338) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index f8b4184335..af0b172ab7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:58:30 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:46:29 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.222842e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.277911e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.278067e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.541565 sec -INFO: No Floating Point Exceptions have been reported - 1,576,107,912 cycles:u # 2.847 GHz (74.89%) - 2,546,010 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.23%) - 5,315,341 stalled-cycles-backend:u # 0.34% backend cycles idle (75.01%) - 2,015,562,862 instructions:u # 1.28 insn per cycle - # 0.00 stalled cycles per insn (74.53%) - 0.594897242 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.101802e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.121265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.122962e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.540843 sec +INFO: No Floating Point Exceptions have been reported + 2,223,817,024 cycles # 2.871 GHz + 3,385,583,234 instructions # 1.52 insn per cycle + 0.831231377 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.691657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.697063e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.697163e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.462346 sec -INFO: No Floating Point Exceptions have been reported - 21,988,430,194 cycles:u # 3.388 GHz (74.92%) - 3,160,782 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 7,539,204 stalled-cycles-backend:u # 0.03% backend cycles idle (75.10%) - 19,611,940,250 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 6.531548390 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.740756e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765684e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766660e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.304935 sec +INFO: No Floating Point Exceptions have been reported + 10,396,192,831 cycles # 2.917 GHz + 23,795,713,123 instructions # 2.29 insn per cycle + 3.619511438 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.598266e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.598631e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.598631e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 35.673351 sec -INFO: No Floating Point Exceptions have been reported - 123,630,854,288 cycles:u # 3.466 GHz (75.00%) - 18,173,436 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.00%) - 11,350,077,369 stalled-cycles-backend:u # 9.18% backend cycles idle (75.00%) - 141,030,575,887 instructions:u # 1.14 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 35.681240408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21174) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.220488e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.220945e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.220945e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 38.865543 sec +INFO: No Floating Point Exceptions have been reported + 114,075,746,984 cycles # 2.935 GHz + 144,284,837,728 instructions # 1.26 insn per cycle + 38.869913276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140482E-004 -Relative difference = 2.8372991758188064e-07 +Avg ME (F77/C++) = 6.6266731198140450E-004 +Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.350552e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.352439e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.352439e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.902764 sec -INFO: No Floating Point Exceptions have been reported - 16,620,667,301 cycles:u # 3.388 GHz (74.95%) - 14,696,809 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.90%) - 6,330,044,323 stalled-cycles-backend:u # 38.09% backend cycles idle (74.95%) - 37,574,901,227 instructions:u # 2.26 insn per cycle - # 0.17 stalled cycles per insn (75.03%) - 4.910500106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68049) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.002635e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.004951e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.004951e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.467988 sec +INFO: No Floating Point Exceptions have been reported + 15,296,909,197 cycles # 2.796 GHz + 37,837,176,497 instructions # 2.47 insn per cycle + 5.472337784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.656480e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.666413e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.666413e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.150332 sec -INFO: No Floating Point Exceptions have been reported - 7,419,558,099 cycles:u # 3.446 GHz (74.80%) - 348,936 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 4,274,680,056 stalled-cycles-backend:u # 57.61% backend cycles idle (75.11%) - 12,764,120,988 instructions:u # 1.72 insn per cycle - # 0.33 stalled cycles per insn (75.11%) - 2.158658646 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45597) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.512966e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.527080e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.527080e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.189426 sec +INFO: No Floating Point Exceptions have been reported + 6,002,714,707 cycles # 2.737 GHz + 12,921,820,063 instructions # 2.15 insn per cycle + 2.193921042 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46048) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.859047e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.878725e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.878725e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.857736 sec +INFO: No Floating Point Exceptions have been reported + 5,096,589,479 cycles # 2.738 GHz + 11,450,886,914 instructions # 2.25 insn per cycle + 1.862161811 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40151) (512y: 219) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.316370e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.329769e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.329769e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.247849 sec +INFO: No Floating Point Exceptions have been reported + 3,953,949,727 cycles # 1.756 GHz + 5,894,038,279 instructions # 1.49 insn per cycle + 2.252346875 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38977) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 35cdb26a8c..90e270bc8d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:16:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:17:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.892908e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027098e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027391e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.491369 sec -INFO: No Floating Point Exceptions have been reported - 1,374,101,883 cycles:u # 2.727 GHz (76.14%) - 2,546,959 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.25%) - 6,128,063 stalled-cycles-backend:u # 0.45% backend cycles idle (75.26%) - 1,919,788,005 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.43%) - 0.680011428 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.485010e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.524901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.528717e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493784 sec +INFO: No Floating Point Exceptions have been reported + 2,047,150,406 cycles # 2.872 GHz + 3,017,206,545 instructions # 1.47 insn per cycle + 0.769521849 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.067138e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.080898e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.081071e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.970325 sec -INFO: No Floating Point Exceptions have been reported - 13,404,801,231 cycles:u # 3.359 GHz (74.86%) - 2,944,990 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) - 5,995,644 stalled-cycles-backend:u # 0.04% backend cycles idle (75.17%) - 12,191,388,620 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 4.031650894 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.130872e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.191030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.193752e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.801872 sec +INFO: No Floating Point Exceptions have been reported + 5,918,779,581 cycles # 2.909 GHz + 12,693,441,452 instructions # 2.14 insn per cycle + 2.093566853 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.705369e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.706685e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.706685e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.068861 sec -INFO: No Floating Point Exceptions have been reported - 21,067,114,413 cycles:u # 3.470 GHz (74.97%) - 1,636,155 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 2,737,208,964 stalled-cycles-backend:u # 12.99% backend cycles idle (74.97%) - 78,062,863,314 instructions:u # 3.71 insn per cycle - # 0.04 stalled cycles per insn (74.98%) - 6.076375494 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.942960e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943920e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943920e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.446218 sec +INFO: No Floating Point Exceptions have been reported + 24,891,970,806 cycles # 2.946 GHz + 79,110,184,615 instructions # 3.18 insn per cycle + 8.450517031 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.073506e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.075556e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.075556e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.534629 sec -INFO: No Floating Point Exceptions have been reported - 5,291,209,026 cycles:u # 3.441 GHz (75.04%) - 208,389 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.03%) - 729,565,068 stalled-cycles-backend:u # 13.79% backend cycles idle (75.03%) - 20,300,438,713 instructions:u # 3.84 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 1.575316034 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.000853e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.014105e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.014105e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.347412 sec +INFO: No Floating Point Exceptions have been reported + 6,535,913,878 cycles # 2.780 GHz + 20,270,850,285 instructions # 3.10 insn per cycle + 2.351723425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.386743e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.396709e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.396709e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.703117 sec -INFO: No Floating Point Exceptions have been reported - 2,403,118,888 cycles:u # 3.444 GHz (74.96%) - 168,146 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.84%) - 213,761,143 stalled-cycles-backend:u # 8.90% backend cycles idle (74.89%) - 7,023,703,794 instructions:u # 2.92 insn per cycle - # 0.03 stalled cycles per insn (74.89%) - 0.751204545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.599290e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.605892e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.605892e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.030594 sec +INFO: No Floating Point Exceptions have been reported + 2,836,963,276 cycles # 2.743 GHz + 7,065,994,832 instructions # 2.49 insn per cycle + 1.034860296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.795295e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.803482e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.803482e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.918514 sec +INFO: No Floating Point Exceptions have been reported + 2,528,652,589 cycles # 2.743 GHz + 6,403,959,518 instructions # 2.53 insn per cycle + 0.922696206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.410082e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.415209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.415209e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.168400 sec +INFO: No Floating Point Exceptions have been reported + 2,072,435,771 cycles # 1.768 GHz + 3,304,546,208 instructions # 1.59 insn per cycle + 1.172720772 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index fb46fc2cdd..c66db7ae78 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:07:41 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:55:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.934278e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.028896e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.028896e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 -TOTAL : 0.542710 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,531,785,718 cycles:u # 2.842 GHz (72.77%) - 3,064,573 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.47%) - 76,194,677 stalled-cycles-backend:u # 4.97% backend cycles idle (76.32%) - 1,782,357,687 instructions:u # 1.16 insn per cycle - # 0.04 stalled cycles per insn (76.30%) - 0.592592082 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.970193e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.498612e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498612e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.479926 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,011,379,748 cycles # 2.884 GHz + 3,038,247,862 instructions # 1.51 insn per cycle + 0.753810180 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.923948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.119756e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.119756e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 5.393986 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,913,453,356 cycles:u # 3.304 GHz (74.92%) - 30,388,586 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.06%) - 2,151,686,487 stalled-cycles-backend:u # 12.01% backend cycles idle (75.09%) - 14,392,276,960 instructions:u # 0.80 insn per cycle - # 0.15 stalled cycles per insn (74.98%) - 5.462019383 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.940879e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.083233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.083233e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.970426 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,440,275,548 cycles # 2.913 GHz + 13,287,281,132 instructions # 2.06 insn per cycle + 2.267959957 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.670433e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.671622e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.671622e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.149800 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 21,343,747,009 cycles:u # 3.469 GHz (74.98%) - 18,868,954 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.03%) - 2,731,556,155 stalled-cycles-backend:u # 12.80% backend cycles idle (75.04%) - 78,007,569,798 instructions:u # 3.65 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 6.157496432 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.936854e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.937817e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937817e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.475178 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 24,927,059,080 cycles # 2.940 GHz + 79,118,119,354 instructions # 3.17 insn per cycle + 8.479535627 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.073610e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.075603e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.075603e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.536410 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,300,583,999 cycles:u # 3.443 GHz (74.86%) - 468,795 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.07%) - 677,533,091 stalled-cycles-backend:u # 12.78% backend cycles idle (75.06%) - 20,323,599,590 instructions:u # 3.83 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 1.544342366 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.002962e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.015509e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.015509e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.349551 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,544,667,804 cycles # 2.781 GHz + 20,279,974,113 instructions # 3.10 insn per cycle + 2.353974711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.383967e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.393992e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.393992e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.696778 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,387,404,039 cycles:u # 3.412 GHz (74.86%) - 231,790 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.86%) - 221,182,116 stalled-cycles-backend:u # 9.26% backend cycles idle (74.86%) - 7,014,055,965 instructions:u # 2.94 insn per cycle - # 0.03 stalled cycles per insn (74.86%) - 0.704363424 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.603853e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.610574e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.610574e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.030576 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,847,456,615 cycles # 2.753 GHz + 7,075,989,633 instructions # 2.49 insn per cycle + 1.035024707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.785349e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.793696e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.793696e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.926691 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,540,934,134 cycles # 2.731 GHz + 6,413,438,200 instructions # 2.52 insn per cycle + 0.931148836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.400821e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405962e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405962e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.179178 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,081,047,712 cycles # 1.760 GHz + 3,314,864,763 instructions # 1.59 insn per cycle + 1.183503546 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 42df23ca66..3aa8ed158e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:12:31 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:06:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.900910e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.043919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.044218e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.205840e-01 +- 3.252482e-01 ) GeV^-4 -TOTAL : 0.510575 sec -INFO: No Floating Point Exceptions have been reported - 1,486,205,403 cycles:u # 2.824 GHz (76.02%) - 2,946,687 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.79%) - 76,773,959 stalled-cycles-backend:u # 5.17% backend cycles idle (75.45%) - 1,853,207,947 instructions:u # 1.25 insn per cycle - # 0.04 stalled cycles per insn (75.11%) - 0.556110538 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.472678e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.513045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.517154e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 +TOTAL : 0.480872 sec +INFO: No Floating Point Exceptions have been reported + 2,012,103,314 cycles # 2.880 GHz + 2,956,061,319 instructions # 1.47 insn per cycle + 0.756135044 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.084290e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.100734e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.100914e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183835e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 5.241201 sec -INFO: No Floating Point Exceptions have been reported - 17,557,459,247 cycles:u # 3.336 GHz (75.14%) - 19,889,271 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.08%) - 6,675,796 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) - 13,522,683,695 instructions:u # 0.77 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 5.297307713 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.032542e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.093526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.096446e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.887302 sec +INFO: No Floating Point Exceptions have been reported + 6,151,892,700 cycles # 2.911 GHz + 12,903,540,079 instructions # 2.10 insn per cycle + 2.177167582 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.621454e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.622759e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.622759e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.262909 sec -INFO: No Floating Point Exceptions have been reported - 21,174,605,451 cycles:u # 3.380 GHz (74.97%) - 16,537,883 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.97%) - 2,687,542,026 stalled-cycles-backend:u # 12.69% backend cycles idle (74.97%) - 78,051,863,948 instructions:u # 3.69 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 6.266907844 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.942290e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943248e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943248e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 8.449460 sec +INFO: No Floating Point Exceptions have been reported + 24,927,677,850 cycles # 2.949 GHz + 79,113,674,015 instructions # 3.17 insn per cycle + 8.453509271 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.063819e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.065973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.065973e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.548640 sec -INFO: No Floating Point Exceptions have been reported - 5,278,056,370 cycles:u # 3.404 GHz (74.72%) - 682,875 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.79%) - 683,912,947 stalled-cycles-backend:u # 12.96% backend cycles idle (75.05%) - 20,308,587,082 instructions:u # 3.85 insn per cycle - # 0.03 stalled cycles per insn (75.24%) - 1.552537130 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.966325e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.979405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.979405e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 2.360667 sec +INFO: No Floating Point Exceptions have been reported + 6,536,812,483 cycles # 2.766 GHz + 20,271,244,947 instructions # 3.10 insn per cycle + 2.364721005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.374945e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.385404e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.385404e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.697351 sec -INFO: No Floating Point Exceptions have been reported - 2,403,349,263 cycles:u # 3.436 GHz (74.85%) - 1,567,151 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.84%) - 219,778,799 stalled-cycles-backend:u # 9.14% backend cycles idle (74.84%) - 7,044,721,523 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (74.84%) - 0.701360541 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.594039e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.600583e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.600583e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 1.034746 sec +INFO: No Floating Point Exceptions have been reported + 2,840,398,673 cycles # 2.736 GHz + 7,064,163,701 instructions # 2.49 insn per cycle + 1.038926233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.789304e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.797829e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.797829e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.922641 sec +INFO: No Floating Point Exceptions have been reported + 2,530,877,890 cycles # 2.733 GHz + 6,400,607,448 instructions # 2.53 insn per cycle + 0.926747674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.398241e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403280e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403280e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 1.179849 sec +INFO: No Floating Point Exceptions have been reported + 2,072,557,863 cycles # 1.752 GHz + 3,302,114,927 instructions # 1.59 insn per cycle + 1.183970001 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 1a7909a978..7797c46a19 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,69 +1,86 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:10:42 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:00:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.942949e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033746e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.034076e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 -TOTAL : 0.506941 sec -INFO: No Floating Point Exceptions have been reported - 1,520,307,429 cycles:u # 2.882 GHz (75.27%) - 3,258,324 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.51%) - 68,532,209 stalled-cycles-backend:u # 4.51% backend cycles idle (74.25%) - 1,858,236,746 instructions:u # 1.22 insn per cycle - # 0.04 stalled cycles per insn (73.66%) - 0.556529800 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.992477e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.494287e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.498231e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.481963 sec +INFO: No Floating Point Exceptions have been reported + 1,997,965,324 cycles # 2.853 GHz + 2,939,834,102 instructions # 1.47 insn per cycle + 0.757193064 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.914615e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.094958e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.095133e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 5.333640 sec -INFO: No Floating Point Exceptions have been reported - 18,043,629,136 cycles:u # 3.366 GHz (75.08%) - 30,079,050 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.09%) - 2,134,096,420 stalled-cycles-backend:u # 11.83% backend cycles idle (75.01%) - 14,503,297,107 instructions:u # 0.80 insn per cycle - # 0.15 stalled cycles per insn (75.03%) - 5.389006417 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.118190e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.192549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.195361e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.905617 sec +INFO: No Floating Point Exceptions have been reported + 6,204,653,970 cycles # 2.904 GHz + 11,932,036,366 instructions # 1.92 insn per cycle + 2.194579719 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -71,34 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.658893e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.660119e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.660119e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.174952 sec -INFO: No Floating Point Exceptions have been reported - 21,311,188,928 cycles:u # 3.450 GHz (75.00%) - 17,843,226 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.00%) - 2,713,676,089 stalled-cycles-backend:u # 12.73% backend cycles idle (75.00%) - 78,004,437,887 instructions:u # 3.66 insn per cycle - # 0.03 stalled cycles per insn (75.00%) - 6.178866187 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.934899e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.935850e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.935850e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.481229 sec +INFO: No Floating Point Exceptions have been reported + 24,933,908,474 cycles # 2.939 GHz + 79,109,779,876 instructions # 3.17 insn per cycle + 8.485474778 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -106,34 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.037398e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.039470e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.039470e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.587755 sec -INFO: No Floating Point Exceptions have been reported - 5,288,866,019 cycles:u # 3.327 GHz (74.84%) - 227,676 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.84%) - 668,093,552 stalled-cycles-backend:u # 12.63% backend cycles idle (74.84%) - 20,316,507,500 instructions:u # 3.84 insn per cycle - # 0.03 stalled cycles per insn (74.93%) - 1.591582006 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.954399e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.967143e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.967143e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.363281 sec +INFO: No Floating Point Exceptions have been reported + 6,535,222,026 cycles # 2.761 GHz + 20,271,091,445 instructions # 3.10 insn per cycle + 2.367564480 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -141,34 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.330008e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.340797e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.340797e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.710511 sec -INFO: No Floating Point Exceptions have been reported - 2,380,119,927 cycles:u # 3.341 GHz (75.30%) - 557,625 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.30%) - 217,294,961 stalled-cycles-backend:u # 9.13% backend cycles idle (75.30%) - 7,015,459,296 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (75.30%) - 0.714337492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.592187e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.598658e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.598658e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.035113 sec +INFO: No Floating Point Exceptions have been reported + 2,837,322,925 cycles # 2.732 GHz + 7,065,851,947 instructions # 2.49 insn per cycle + 1.039614272 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,16 +186,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.786472e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.794657e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.794657e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.923192 sec +INFO: No Floating Point Exceptions have been reported + 2,528,197,649 cycles # 2.730 GHz + 6,403,497,083 instructions # 2.53 insn per cycle + 0.927414591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.394144e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.399234e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.399234e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.181792 sec +INFO: No Floating Point Exceptions have been reported + 2,068,985,618 cycles # 1.745 GHz + 3,303,850,767 instructions # 1.60 insn per cycle + 1.186123644 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index d479d256f1..9b731718b7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:17:03 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:17:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.891386e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.024659e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.024962e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.491872 sec -INFO: No Floating Point Exceptions have been reported - 1,367,931,450 cycles:u # 2.716 GHz (76.22%) - 2,444,009 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.88%) - 7,290,356 stalled-cycles-backend:u # 0.53% backend cycles idle (75.72%) - 1,873,529,608 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (74.64%) - 0.548452272 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.454590e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.492804e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.497193e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.495704 sec +INFO: No Floating Point Exceptions have been reported + 2,032,995,153 cycles # 2.848 GHz + 2,991,224,667 instructions # 1.47 insn per cycle + 0.774166376 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.088094e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.102024e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.102211e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.951404 sec -INFO: No Floating Point Exceptions have been reported - 13,133,485,956 cycles:u # 3.324 GHz (75.01%) - 3,056,975 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.01%) - 6,407,169 stalled-cycles-backend:u # 0.05% backend cycles idle (74.85%) - 12,133,816,753 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (74.85%) - 4.011802643 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.094149e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.154905e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.157610e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.805319 sec +INFO: No Floating Point Exceptions have been reported + 5,914,324,613 cycles # 2.902 GHz + 11,873,756,543 instructions # 2.01 insn per cycle + 2.096430893 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.709215e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.710598e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.710598e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.059935 sec -INFO: No Floating Point Exceptions have been reported - 21,003,004,134 cycles:u # 3.465 GHz (74.93%) - 1,153,390 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 2,724,239,552 stalled-cycles-backend:u # 12.97% backend cycles idle (75.02%) - 77,982,313,980 instructions:u # 3.71 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 6.066960240 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.929536e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.930480e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.930480e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.504728 sec +INFO: No Floating Point Exceptions have been reported + 25,015,654,943 cycles # 2.941 GHz + 78,847,702,433 instructions # 3.15 insn per cycle + 8.508857223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868874222764E-004 -Relative difference = 1.698648731198014e-08 +Avg ME (F77/C++) = 6.6274866250177339E-004 +Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.081042e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.083133e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.083133e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.523801 sec -INFO: No Floating Point Exceptions have been reported - 5,298,511,047 cycles:u # 3.471 GHz (74.70%) - 212,156 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.85%) - 741,731,752 stalled-cycles-backend:u # 14.00% backend cycles idle (74.85%) - 20,314,215,061 instructions:u # 3.83 insn per cycle - # 0.04 stalled cycles per insn (74.88%) - 1.530800363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.178831e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.192718e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.192718e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.289338 sec +INFO: No Floating Point Exceptions have been reported + 6,463,318,702 cycles # 2.819 GHz + 20,229,880,790 instructions # 3.13 insn per cycle + 2.293529801 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861448331612E-004 +Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.387867e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.398246e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.398246e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.708932 sec -INFO: No Floating Point Exceptions have been reported - 2,404,304,951 cycles:u # 3.447 GHz (74.71%) - 164,227 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.85%) - 257,148,789 stalled-cycles-backend:u # 10.70% backend cycles idle (74.85%) - 7,021,443,847 instructions:u # 2.92 insn per cycle - # 0.04 stalled cycles per insn (74.78%) - 0.766054321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10773) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.520587e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.526569e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.526569e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.083432 sec +INFO: No Floating Point Exceptions have been reported + 2,984,403,957 cycles # 2.746 GHz + 7,207,167,499 instructions # 2.41 insn per cycle + 1.087697042 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.733677e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.741677e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.741677e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.950819 sec +INFO: No Floating Point Exceptions have been reported + 2,611,989,316 cycles # 2.737 GHz + 6,545,448,351 instructions # 2.51 insn per cycle + 0.954971597 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 27) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.366907e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.371833e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.371833e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.204851 sec +INFO: No Floating Point Exceptions have been reported + 2,138,789,905 cycles # 1.770 GHz + 3,461,611,954 instructions # 1.62 insn per cycle + 1.209183599 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952032316561E-004 +Relative difference = 3.066631594207157e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index cd1148f688..2cbba9e698 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:59:30 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:47:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.913343e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.049266e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.049574e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.488690 sec -INFO: No Floating Point Exceptions have been reported - 1,394,377,530 cycles:u # 2.786 GHz (75.85%) - 2,538,433 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.30%) - 5,232,172 stalled-cycles-backend:u # 0.38% backend cycles idle (75.35%) - 1,817,251,691 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (76.01%) - 0.543742736 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.579593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.616784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620542e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.508462 sec +INFO: No Floating Point Exceptions have been reported + 2,050,083,222 cycles # 2.848 GHz + 2,995,129,166 instructions # 1.46 insn per cycle + 0.787145254 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.056819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.070234e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.070408e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 4.103063 sec -INFO: No Floating Point Exceptions have been reported - 13,399,052,407 cycles:u # 3.351 GHz (75.07%) - 3,050,420 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.01%) - 7,952,892 stalled-cycles-backend:u # 0.06% backend cycles idle (75.00%) - 12,253,881,436 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (74.87%) - 4.159583942 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.605413e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.675177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.678183e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.737190 sec +INFO: No Floating Point Exceptions have been reported + 5,761,752,713 cycles # 2.921 GHz + 12,131,218,179 instructions # 2.11 insn per cycle + 2.028782459 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.770941e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.771498e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.771498e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 28.425558 sec -INFO: No Floating Point Exceptions have been reported - 98,567,509,459 cycles:u # 3.467 GHz (74.99%) - 298,641,860 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.98%) - 5,269,027,296 stalled-cycles-backend:u # 5.35% backend cycles idle (75.00%) - 132,438,029,653 instructions:u # 1.34 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 28.433002977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:17007) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.602317e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.603102e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.603102e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 29.279245 sec +INFO: No Floating Point Exceptions have been reported + 85,920,999,170 cycles # 2.934 GHz + 135,650,935,446 instructions # 1.58 insn per cycle + 29.283501695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275346655336742E-004 -Relative difference = 5.0466172741879477e-08 +Avg ME (F77/C++) = 6.6275349717465765E-004 +Relative difference = 4.26303654465793e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.145293e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.156991e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.156991e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.020277 sec -INFO: No Floating Point Exceptions have been reported - 7,031,832,622 cycles:u # 3.476 GHz (74.85%) - 4,625,174 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.03%) - 3,108,170,327 stalled-cycles-backend:u # 44.20% backend cycles idle (75.09%) - 19,163,359,990 instructions:u # 2.73 insn per cycle - # 0.16 stalled cycles per insn (75.09%) - 2.027504768 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69115) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.859267e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.871489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.871489e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.395975 sec +INFO: No Floating Point Exceptions have been reported + 6,767,487,912 cycles # 2.821 GHz + 19,352,953,840 instructions # 2.86 insn per cycle + 2.400276342 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857190509046E-004 -Relative difference = 4.239150340994169e-08 +Avg ME (F77/C++) = 6.6274862748188362E-004 +Relative difference = 4.14665283800746e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.464625e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.468364e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.468364e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.126513 sec -INFO: No Floating Point Exceptions have been reported - 3,904,723,679 cycles:u # 3.458 GHz (74.88%) - 357,168 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.18%) - 2,244,634,890 stalled-cycles-backend:u # 57.49% backend cycles idle (75.21%) - 6,704,968,345 instructions:u # 1.72 insn per cycle - # 0.33 stalled cycles per insn (75.21%) - 1.133564716 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48510) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.427993e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.433168e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.433168e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.153582 sec +INFO: No Floating Point Exceptions have been reported + 3,172,176,609 cycles # 2.741 GHz + 6,794,912,676 instructions # 2.14 insn per cycle + 1.157865028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49034) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735727803539E-004 -Relative difference = 6.446385744398604e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.725737e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.733579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.733579e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.955483 sec +INFO: No Floating Point Exceptions have been reported + 2,630,257,808 cycles # 2.742 GHz + 5,970,030,267 instructions # 2.27 insn per cycle + 0.959792623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42602) (512y: 11) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.398705e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.403700e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.403700e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.177833 sec +INFO: No Floating Point Exceptions have been reported + 2,074,489,030 cycles # 1.756 GHz + 3,495,482,745 instructions # 1.68 insn per cycle + 1.182176144 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5208) (512y: 3) (512z:44858) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750237027223E-004 +Relative difference = 3.5765412974815996e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index c0a0efd470..307c9cbde7 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_18:00:14 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:48:29 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.889528e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021901e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.022220e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.487499 sec -INFO: No Floating Point Exceptions have been reported - 1,399,710,931 cycles:u # 2.802 GHz (74.84%) - 2,534,919 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.85%) - 5,613,558 stalled-cycles-backend:u # 0.40% backend cycles idle (75.39%) - 1,783,832,893 instructions:u # 1.27 insn per cycle - # 0.00 stalled cycles per insn (76.12%) - 0.541982713 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.556326e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.594247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.598112e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.495550 sec +INFO: No Floating Point Exceptions have been reported + 2,046,506,588 cycles # 2.866 GHz + 3,036,453,126 instructions # 1.48 insn per cycle + 0.773976715 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.122721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.137313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.137491e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.896901 sec -INFO: No Floating Point Exceptions have been reported - 13,168,180,477 cycles:u # 3.362 GHz (74.89%) - 2,694,573 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.15%) - 6,900,160 stalled-cycles-backend:u # 0.05% backend cycles idle (75.20%) - 11,988,237,942 instructions:u # 0.91 insn per cycle - # 0.00 stalled cycles per insn (75.28%) - 3.955917360 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.676205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.747820e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.750770e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.731569 sec +INFO: No Floating Point Exceptions have been reported + 5,750,101,661 cycles # 2.911 GHz + 12,015,194,090 instructions # 2.09 insn per cycle + 2.032327922 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025600481842E-004 -Relative difference = 4.022433151864302e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.923477e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.924063e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.924063e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 27.692633 sec -INFO: No Floating Point Exceptions have been reported - 96,250,613,811 cycles:u # 3.476 GHz (74.99%) - 107,567,410 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.99%) - 6,145,471,263 stalled-cycles-backend:u # 6.38% backend cycles idle (74.99%) - 131,678,393,429 instructions:u # 1.37 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 27.700069041 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:16664) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.582687e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.583472e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583472e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 29.381578 sec +INFO: No Floating Point Exceptions have been reported + 86,090,574,106 cycles # 2.930 GHz + 135,364,281,032 instructions # 1.57 insn per cycle + 29.385785407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275348053303901E-004 -Relative difference = 2.9372852846917734e-08 +Avg ME (F77/C++) = 6.6275349662128086E-004 +Relative difference = 5.098002770919431e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.780732e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.791659e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.791659e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.114411 sec -INFO: No Floating Point Exceptions have been reported - 7,188,013,987 cycles:u # 3.395 GHz (74.96%) - 2,072,727 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.06%) - 3,171,264,483 stalled-cycles-backend:u # 44.12% backend cycles idle (75.06%) - 19,159,161,719 instructions:u # 2.67 insn per cycle - # 0.17 stalled cycles per insn (75.06%) - 2.121741568 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68769) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.781191e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.793019e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.793019e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.423420 sec +INFO: No Floating Point Exceptions have been reported + 6,852,713,563 cycles # 2.824 GHz + 19,471,819,479 instructions # 2.84 insn per cycle + 2.427762808 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857155746575E-004 -Relative difference = 4.291602312495571e-08 +Avg ME (F77/C++) = 6.6274862799683282E-004 +Relative difference = 4.2243518621014775e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.472732e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.476503e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.476503e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.120177 sec -INFO: No Floating Point Exceptions have been reported - 3,830,725,464 cycles:u # 3.412 GHz (74.87%) - 182,458 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.07%) - 2,163,439,633 stalled-cycles-backend:u # 56.48% backend cycles idle (75.07%) - 6,640,488,588 instructions:u # 1.73 insn per cycle - # 0.33 stalled cycles per insn (75.07%) - 1.127441049 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47334) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.462291e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.467817e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.467817e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.126460 sec +INFO: No Floating Point Exceptions have been reported + 3,104,466,483 cycles # 2.747 GHz + 6,715,454,919 instructions # 2.16 insn per cycle + 1.130606631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47692) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735712090414E-004 -Relative difference = 6.470095531024898e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.731919e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.740037e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.740037e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.951895 sec +INFO: No Floating Point Exceptions have been reported + 2,625,337,295 cycles # 2.748 GHz + 5,966,178,470 instructions # 2.27 insn per cycle + 0.956115789 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41858) (512y: 13) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.400560e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.405624e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.405624e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.176100 sec +INFO: No Floating Point Exceptions have been reported + 2,074,048,907 cycles # 1.758 GHz + 3,487,720,369 instructions # 1.68 insn per cycle + 1.180409639 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44494) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750247886592E-004 +Relative difference = 3.740400032174438e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index bb3fc679f3..9378c125b2 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:17:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:18:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.203568e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256693e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.256851e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.559094 sec -INFO: No Floating Point Exceptions have been reported - 1,652,028,100 cycles:u # 2.893 GHz (74.78%) - 2,492,328 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.84%) - 5,430,257 stalled-cycles-backend:u # 0.33% backend cycles idle (75.24%) - 2,047,800,783 instructions:u # 1.24 insn per cycle - # 0.00 stalled cycles per insn (74.71%) - 0.612737111 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.318122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344688e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346795e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.537854 sec +INFO: No Floating Point Exceptions have been reported + 2,221,101,860 cycles # 2.870 GHz + 3,456,789,338 instructions # 1.56 insn per cycle + 0.830636964 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.679174e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.684528e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.684629e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.702058 sec -INFO: No Floating Point Exceptions have been reported - 22,612,281,809 cycles:u # 3.371 GHz (74.82%) - 3,413,939 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.91%) - 7,403,546 stalled-cycles-backend:u # 0.03% backend cycles idle (75.07%) - 20,078,972,435 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.17%) - 6.769412191 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.135476e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.165199e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166400e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.042010 sec +INFO: No Floating Point Exceptions have been reported + 9,635,962,932 cycles # 2.918 GHz + 21,731,646,939 instructions # 2.26 insn per cycle + 3.358385171 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.565308e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.566517e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.566517e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.401537 sec -INFO: No Floating Point Exceptions have been reported - 21,503,404,728 cycles:u # 3.358 GHz (75.02%) - 817,007 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) - 2,712,543,238 stalled-cycles-backend:u # 12.61% backend cycles idle (75.02%) - 78,876,861,074 instructions:u # 3.67 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 6.408587192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4817) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.865433e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.866327e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.866327e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.798260 sec +INFO: No Floating Point Exceptions have been reported + 25,923,427,719 cycles # 2.945 GHz + 79,426,669,152 instructions # 3.06 insn per cycle + 8.802604907 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.463692e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.468812e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.468812e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.010165 sec -INFO: No Floating Point Exceptions have been reported - 10,388,779,314 cycles:u # 3.448 GHz (74.90%) - 3,190,734 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.04%) - 1,318,490,699 stalled-cycles-backend:u # 12.69% backend cycles idle (75.04%) - 38,688,631,929 instructions:u # 3.72 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 3.017230551 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12020) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.509753e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512944e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.512944e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.679543 sec +INFO: No Floating Point Exceptions have been reported + 12,835,987,651 cycles # 2.741 GHz + 38,823,362,502 instructions # 3.02 insn per cycle + 4.683930656 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.216759e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.219259e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.219259e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.356179 sec -INFO: No Floating Point Exceptions have been reported - 4,702,260,606 cycles:u # 3.460 GHz (74.87%) - 623,171 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.70%) - 440,406,312 stalled-cycles-backend:u # 9.37% backend cycles idle (74.59%) - 13,620,136,115 instructions:u # 2.90 insn per cycle - # 0.03 stalled cycles per insn (74.88%) - 1.363616004 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10261) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.042437e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.059866e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.059866e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.045674 sec +INFO: No Floating Point Exceptions have been reported + 5,599,505,022 cycles # 2.733 GHz + 13,616,194,882 instructions # 2.43 insn per cycle + 2.050016410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.300992e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.323362e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.323362e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.769579 sec +INFO: No Floating Point Exceptions have been reported + 4,864,538,423 cycles # 2.743 GHz + 12,294,521,282 instructions # 2.53 insn per cycle + 1.774039102 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 80) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.972443e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.984642e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.984642e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.358911 sec +INFO: No Floating Point Exceptions have been reported + 4,168,866,472 cycles # 1.765 GHz + 6,393,098,618 instructions # 1.53 insn per cycle + 2.363390601 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1983) (512y: 92) (512z: 9360) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 56d4d37ac1..032ee51884 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-09-18_17:17:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:18:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.218240e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.272928e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273080e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.542918 sec -INFO: No Floating Point Exceptions have been reported - 1,567,973,610 cycles:u # 2.821 GHz (74.81%) - 2,593,051 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.60%) - 6,325,504 stalled-cycles-backend:u # 0.40% backend cycles idle (75.57%) - 2,043,128,897 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 0.593353614 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.323949e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.349755e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352036e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.534628 sec +INFO: No Floating Point Exceptions have been reported + 2,204,767,059 cycles # 2.871 GHz + 3,455,052,131 instructions # 1.57 insn per cycle + 0.826431777 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.691717e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.697156e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.697257e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.465394 sec -INFO: No Floating Point Exceptions have been reported - 21,957,831,405 cycles:u # 3.378 GHz (75.02%) - 3,245,067 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.09%) - 7,050,657 stalled-cycles-backend:u # 0.03% backend cycles idle (75.16%) - 19,596,656,251 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.14%) - 6.546179574 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.145238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.175049e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176235e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.038517 sec +INFO: No Floating Point Exceptions have been reported + 9,654,182,964 cycles # 2.928 GHz + 20,172,707,879 instructions # 2.09 insn per cycle + 3.353606693 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.642658e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.643866e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.643866e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.214109 sec -INFO: No Floating Point Exceptions have been reported - 21,594,617,303 cycles:u # 3.474 GHz (74.92%) - 773,227 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 2,926,370,471 stalled-cycles-backend:u # 13.55% backend cycles idle (75.04%) - 78,771,997,564 instructions:u # 3.65 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 6.221411052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4763) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.861444e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.862342e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.862342e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.816790 sec +INFO: No Floating Point Exceptions have been reported + 25,987,801,849 cycles # 2.947 GHz + 79,452,087,213 instructions # 3.06 insn per cycle + 8.821027518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.498994e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.504608e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.504608e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.990999 sec -INFO: No Floating Point Exceptions have been reported - 10,340,645,927 cycles:u # 3.454 GHz (74.89%) - 3,779,975 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.88%) - 1,345,814,664 stalled-cycles-backend:u # 13.01% backend cycles idle (74.90%) - 38,784,635,393 instructions:u # 3.75 insn per cycle - # 0.03 stalled cycles per insn (75.01%) - 2.997927612 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11990) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.513306e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.516455e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.516455e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.674356 sec +INFO: No Floating Point Exceptions have been reported + 12,813,296,665 cycles # 2.739 GHz + 38,778,823,155 instructions # 3.03 insn per cycle + 4.678665662 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.208487e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.210948e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.210948e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.365089 sec -INFO: No Floating Point Exceptions have been reported - 4,698,068,104 cycles:u # 3.435 GHz (74.90%) - 970,217 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.86%) - 438,810,652 stalled-cycles-backend:u # 9.34% backend cycles idle (74.85%) - 13,603,628,607 instructions:u # 2.90 insn per cycle - # 0.03 stalled cycles per insn (74.85%) - 1.371980486 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10235) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.042911e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.058963e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.058963e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.045213 sec +INFO: No Floating Point Exceptions have been reported + 5,589,546,199 cycles # 2.728 GHz + 13,732,854,665 instructions # 2.46 insn per cycle + 2.049788655 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.106583e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.127720e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.127720e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.807350 sec +INFO: No Floating Point Exceptions have been reported + 4,955,573,408 cycles # 2.736 GHz + 12,423,027,135 instructions # 2.51 insn per cycle + 1.811880023 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.875797e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.888202e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.888202e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.391557 sec +INFO: No Floating Point Exceptions have been reported + 4,183,217,410 cycles # 1.747 GHz + 6,495,987,121 instructions # 1.55 insn per cycle + 2.396041838 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1806) (512y: 190) (512z: 9358) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index b360bc4479..7ab313debd 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:18:23 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:20:52 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.053996e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054389e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054544e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.439529 sec +INFO: No Floating Point Exceptions have been reported + 8,096,284,346 cycles # 2.927 GHz + 17,063,420,790 instructions # 2.11 insn per cycle + 2.826206150 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.238045e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.240055e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.240313e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.011196 sec +INFO: No Floating Point Exceptions have been reported + 12,704,613,289 cycles # 2.925 GHz + 30,115,204,727 instructions # 2.37 insn per cycle + 4.397191434 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.186105e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186142e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186142e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.469648 sec -INFO: No Floating Point Exceptions have been reported - 15,438,712,619 cycles:u # 3.463 GHz (74.97%) - 7,657,888 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.05%) - 1,562,103,760 stalled-cycles-backend:u # 10.12% backend cycles idle (75.06%) - 53,525,096,792 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 4.476405884 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.572616e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.572823e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.572823e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.975845 sec +INFO: No Floating Point Exceptions have been reported + 19,035,417,803 cycles # 2.728 GHz + 53,904,235,908 instructions # 2.83 insn per cycle + 6.980238056 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.331650e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.331789e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.331789e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.268575 sec -INFO: No Floating Point Exceptions have been reported - 7,859,912,105 cycles:u # 3.461 GHz (74.99%) - 930,734 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.99%) - 761,875,838 stalled-cycles-backend:u # 9.69% backend cycles idle (74.99%) - 27,073,752,041 instructions:u # 3.44 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 2.275150161 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.590030e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.590126e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.590126e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.323749 sec +INFO: No Floating Point Exceptions have been reported + 9,780,563,101 cycles # 2.940 GHz + 27,151,089,688 instructions # 2.78 insn per cycle + 3.328023666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.142642e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.143137e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.143137e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.028794 sec -INFO: No Floating Point Exceptions have been reported - 3,559,164,828 cycles:u # 3.451 GHz (74.59%) - 695,562 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) - 279,310,261 stalled-cycles-backend:u # 7.85% backend cycles idle (75.19%) - 9,564,381,285 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (75.19%) - 1.035696149 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.385331e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385742e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385742e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.561733 sec +INFO: No Floating Point Exceptions have been reported + 4,266,182,969 cycles # 2.725 GHz + 9,590,975,871 instructions # 2.25 insn per cycle + 1.566018474 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.892057e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892635e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.358913 sec +INFO: No Floating Point Exceptions have been reported + 3,729,263,843 cycles # 2.737 GHz + 8,515,569,817 instructions # 2.28 insn per cycle + 1.363199183 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.395803e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.396338e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.396338e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.556955 sec +INFO: No Floating Point Exceptions have been reported + 2,698,860,839 cycles # 1.729 GHz + 4,282,343,065 instructions # 1.59 insn per cycle + 1.561500058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 5ed64682b9..5983376983 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_18:08:00 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:56:01 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.052616e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.054381e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.054381e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.389054 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,904,157,038 cycles # 2.912 GHz + 16,771,352,323 instructions # 2.12 insn per cycle + 2.770325050 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.237194e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.272545e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.272545e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.987988 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 12,604,681,487 cycles # 2.919 GHz + 28,965,849,382 instructions # 2.30 insn per cycle + 4.373962640 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.177999e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178036e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.178036e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.485152 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,438,451,647 cycles:u # 3.440 GHz (74.90%) - 8,146,011 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.98%) - 1,535,013,765 stalled-cycles-backend:u # 9.94% backend cycles idle (75.05%) - 53,481,620,838 instructions:u # 3.46 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 4.492997642 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.613542e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.613748e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.613748e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.936083 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,900,515,421 cycles # 2.724 GHz + 53,905,451,035 instructions # 2.85 insn per cycle + 6.940621858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.268926e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.269053e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.269053e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.327192 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,142,656,834 cycles:u # 3.495 GHz (74.89%) - 1,696,473 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) - 871,992,317 stalled-cycles-backend:u # 10.71% backend cycles idle (74.94%) - 27,012,965,155 instructions:u # 3.32 insn per cycle - # 0.03 stalled cycles per insn (74.94%) - 2.334766690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.538785e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.538876e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.538876e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.433615 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,052,781,401 cycles # 2.925 GHz + 27,153,872,228 instructions # 2.70 insn per cycle + 3.438126502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.154554e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.155050e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.155050e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.027621 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,597,273,520 cycles:u # 3.491 GHz (74.96%) - 2,850,406 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.16%) - 326,699,824 stalled-cycles-backend:u # 9.08% backend cycles idle (75.16%) - 9,558,618,447 instructions:u # 2.66 insn per cycle - # 0.03 stalled cycles per insn (75.16%) - 1.035578637 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.384986e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.385397e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.385397e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.561732 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,257,385,748 cycles # 2.719 GHz + 9,593,157,745 instructions # 2.25 insn per cycle + 1.566325188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.887075e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.887680e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.887680e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.360664 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,718,394,007 cycles # 2.725 GHz + 8,517,746,108 instructions # 2.29 insn per cycle + 1.365273931 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.422958e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.423581e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423581e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.545411 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,703,115,511 cycles # 1.745 GHz + 4,284,711,505 instructions # 1.59 insn per cycle + 1.550234745 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 1fdd2acac3..6972883511 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:19:01 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:22:20 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.054893e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055305e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055482e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.442249 sec +INFO: No Floating Point Exceptions have been reported + 8,106,561,725 cycles # 2.931 GHz + 17,204,264,784 instructions # 2.12 insn per cycle + 2.825101828 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.195814e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.197984e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.198227e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.015071 sec +INFO: No Floating Point Exceptions have been reported + 12,724,131,626 cycles # 2.928 GHz + 29,969,146,046 instructions # 2.36 insn per cycle + 4.400441096 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.179822e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.179860e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.179860e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.476637 sec -INFO: No Floating Point Exceptions have been reported - 15,420,433,610 cycles:u # 3.443 GHz (75.00%) - 3,322,675 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 1,629,605,946 stalled-cycles-backend:u # 10.57% backend cycles idle (75.00%) - 53,478,329,026 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.00%) - 4.483381301 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.111535e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.111769e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.111769e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.512090 sec +INFO: No Floating Point Exceptions have been reported + 18,865,192,455 cycles # 2.896 GHz + 53,932,477,912 instructions # 2.86 insn per cycle + 6.516216407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.327075e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.327244e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.327244e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.315391 sec -INFO: No Floating Point Exceptions have been reported - 8,010,165,479 cycles:u # 3.455 GHz (74.85%) - 2,043,919 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.82%) - 796,468,646 stalled-cycles-backend:u # 9.94% backend cycles idle (74.89%) - 27,088,519,124 instructions:u # 3.38 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 2.323093595 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.566187e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.566277e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.566277e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.374379 sec +INFO: No Floating Point Exceptions have been reported + 9,914,343,626 cycles # 2.935 GHz + 27,131,823,716 instructions # 2.74 insn per cycle + 3.378885579 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.121290e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.121789e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.121789e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.032658 sec -INFO: No Floating Point Exceptions have been reported - 3,553,389,334 cycles:u # 3.432 GHz (74.53%) - 1,260,522 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.80%) - 317,263,372 stalled-cycles-backend:u # 8.93% backend cycles idle (75.17%) - 9,558,578,083 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (75.28%) - 1.039354410 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.354421e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.354826e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.354826e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.575572 sec +INFO: No Floating Point Exceptions have been reported + 4,301,534,798 cycles # 2.724 GHz + 9,586,207,937 instructions # 2.23 insn per cycle + 1.579825913 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.882229e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.882764e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.882764e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.361976 sec +INFO: No Floating Point Exceptions have been reported + 3,732,974,645 cycles # 2.734 GHz + 8,507,919,232 instructions # 2.28 insn per cycle + 1.366219448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.421560e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.422069e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422069e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.546646 sec +INFO: No Floating Point Exceptions have been reported + 2,700,867,753 cycles # 1.742 GHz + 4,281,876,861 instructions # 1.59 insn per cycle + 1.551074701 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2693) (512y: 184) (512z:79098) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 076451c385..41f4336bf3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:19:39 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:23:48 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.204897e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.205686e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.206019e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.744512 sec +INFO: No Floating Point Exceptions have been reported + 5,890,882,031 cycles # 2.919 GHz + 11,806,932,962 instructions # 2.00 insn per cycle + 2.074529782 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.136881e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.137530e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.137618e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.049597 sec +INFO: No Floating Point Exceptions have been reported + 6,759,095,385 cycles # 2.923 GHz + 14,845,205,038 instructions # 2.20 insn per cycle + 2.369358973 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.080859e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.080881e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.080881e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.889914 sec -INFO: No Floating Point Exceptions have been reported - 16,954,036,007 cycles:u # 3.465 GHz (75.02%) - 102,686,449 stalled-cycles-frontend:u # 0.61% frontend cycles idle (74.99%) - 1,739,144,010 stalled-cycles-backend:u # 10.26% backend cycles idle (74.98%) - 54,211,433,935 instructions:u # 3.20 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.897013982 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.543544e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.543805e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.543805e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.183077 sec +INFO: No Floating Point Exceptions have been reported + 18,161,151,116 cycles # 2.936 GHz + 53,910,939,698 instructions # 2.97 insn per cycle + 6.187519652 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.827034e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.827460e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.827460e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.096823 sec -INFO: No Floating Point Exceptions have been reported - 3,748,016,979 cycles:u # 3.409 GHz (74.60%) - 686,539 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) - 358,581,520 stalled-cycles-backend:u # 9.57% backend cycles idle (75.25%) - 13,752,008,336 instructions:u # 3.67 insn per cycle - # 0.03 stalled cycles per insn (75.27%) - 1.126413502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.361492e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.361888e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.361888e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.573076 sec +INFO: No Floating Point Exceptions have been reported + 4,616,676,545 cycles # 2.928 GHz + 13,807,548,367 instructions # 2.99 insn per cycle + 1.577368513 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.978506e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.980069e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.980069e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.531646 sec -INFO: No Floating Point Exceptions have been reported - 1,793,798,155 cycles:u # 3.357 GHz (74.56%) - 990,764 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.56%) - 176,945,664 stalled-cycles-backend:u # 9.86% backend cycles idle (74.33%) - 4,825,441,710 instructions:u # 2.69 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 0.538766048 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.784398e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.786227e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.786227e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.780447 sec +INFO: No Floating Point Exceptions have been reported + 2,130,555,516 cycles # 2.717 GHz + 4,837,275,089 instructions # 2.27 insn per cycle + 0.784743576 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.634242e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.636553e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.636553e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.693797 sec +INFO: No Floating Point Exceptions have been reported + 1,903,490,036 cycles # 2.729 GHz + 4,291,225,209 instructions # 2.25 insn per cycle + 0.698112096 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.885404e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.887629e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.887629e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.769903 sec +INFO: No Floating Point Exceptions have been reported + 1,354,371,935 cycles # 1.750 GHz + 2,162,822,545 instructions # 1.60 insn per cycle + 0.774469858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 306fb8fe41..8d8b09449b 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_18:08:39 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:57:30 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.296128e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300632e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.300632e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 +TOTAL : 1.691925 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,650,162,983 cycles # 2.892 GHz + 11,596,549,862 instructions # 2.05 insn per cycle + 2.010258263 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.106225e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.117844e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.117844e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 +TOTAL : 2.039868 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,704,150,880 cycles # 2.913 GHz + 14,933,981,007 instructions # 2.23 insn per cycle + 2.357689511 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.089078e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089099e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089099e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.849175 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 16,844,001,251 cycles:u # 3.472 GHz (75.00%) - 101,836,746 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.95%) - 1,763,250,726 stalled-cycles-backend:u # 10.47% backend cycles idle (74.94%) - 54,132,335,107 instructions:u # 3.21 insn per cycle - # 0.03 stalled cycles per insn (74.94%) - 4.973728962 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.476123e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.476381e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.476381e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.231080 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,168,605,946 cycles # 2.914 GHz + 53,913,151,543 instructions # 2.97 insn per cycle + 6.235604617 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.855881e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.856328e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.856328e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.089527 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,771,188,595 cycles:u # 3.452 GHz (75.05%) - 597,687 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) - 332,206,250 stalled-cycles-backend:u # 8.81% backend cycles idle (75.11%) - 13,755,411,669 instructions:u # 3.65 insn per cycle - # 0.02 stalled cycles per insn (75.11%) - 1.097297671 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.367327e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.367745e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.367745e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.570359 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,609,204,013 cycles # 2.928 GHz + 13,810,618,137 instructions # 3.00 insn per cycle + 1.574904752 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.029580e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.029744e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.029744e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.515291 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,780,186,787 cycles:u # 3.435 GHz (75.01%) - 322,585 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.31%) - 157,439,754 stalled-cycles-backend:u # 8.84% backend cycles idle (75.31%) - 4,809,438,119 instructions:u # 2.70 insn per cycle - # 0.03 stalled cycles per insn (75.31%) - 0.523072659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.813057e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.814753e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.814753e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.777334 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,130,492,369 cycles # 2.727 GHz + 4,838,939,909 instructions # 2.27 insn per cycle + 0.781848874 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.672152e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.674283e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.674283e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.690681 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,888,040,180 cycles # 2.718 GHz + 4,293,435,273 instructions # 2.27 insn per cycle + 0.695178892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.827455e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.829435e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.829435e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.776050 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,356,992,115 cycles # 1.740 GHz + 2,165,171,343 instructions # 1.60 insn per cycle + 0.780688696 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index d55898c9ff..43e4fd4779 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:20:11 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:24:51 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.195253e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.195989e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.196280e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.748628 sec +INFO: No Floating Point Exceptions have been reported + 5,866,556,695 cycles # 2.917 GHz + 12,565,857,650 instructions # 2.14 insn per cycle + 2.067294353 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.121566e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.122211e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.122327e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.053021 sec +INFO: No Floating Point Exceptions have been reported + 6,778,897,896 cycles # 2.924 GHz + 14,985,250,436 instructions # 2.21 insn per cycle + 2.374125707 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260107E-003 +Relative difference = 0.0021940095370041636 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.084126e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.084147e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.084147e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.875160 sec -INFO: No Floating Point Exceptions have been reported - 16,871,163,360 cycles:u # 3.459 GHz (74.93%) - 104,611,881 stalled-cycles-frontend:u # 0.62% frontend cycles idle (74.91%) - 1,736,496,978 stalled-cycles-backend:u # 10.29% backend cycles idle (74.94%) - 54,185,128,945 instructions:u # 3.21 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 4.882221401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.587070e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.587332e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.587332e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.151381 sec +INFO: No Floating Point Exceptions have been reported + 18,055,403,744 cycles # 2.934 GHz + 53,896,033,902 instructions # 2.99 insn per cycle + 6.155606485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614199186E-003 -Relative difference = 3.435558690007174e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.877298e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.877727e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.877727e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.085679 sec -INFO: No Floating Point Exceptions have been reported - 3,755,580,707 cycles:u # 3.451 GHz (75.08%) - 515,828 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 341,735,253 stalled-cycles-backend:u # 9.10% backend cycles idle (75.01%) - 13,749,402,021 instructions:u # 3.66 insn per cycle - # 0.02 stalled cycles per insn (75.01%) - 1.092576997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.398632e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399059e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.555535 sec +INFO: No Floating Point Exceptions have been reported + 4,569,755,461 cycles # 2.931 GHz + 13,800,747,699 instructions # 3.02 insn per cycle + 1.559859354 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896065809E-003 +Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.020454e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.020611e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.020611e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.519654 sec -INFO: No Floating Point Exceptions have been reported - 1,798,676,182 cycles:u # 3.444 GHz (74.57%) - 672,805 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.30%) - 155,964,300 stalled-cycles-backend:u # 8.67% backend cycles idle (75.50%) - 4,810,457,285 instructions:u # 2.67 insn per cycle - # 0.03 stalled cycles per insn (75.50%) - 0.526441967 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.803652e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.805665e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.805665e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.778546 sec +INFO: No Floating Point Exceptions have been reported + 2,147,523,686 cycles # 2.745 GHz + 4,840,927,245 instructions # 2.25 insn per cycle + 0.782889882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.693768e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.696106e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.696106e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.688038 sec +INFO: No Floating Point Exceptions have been reported + 1,894,736,849 cycles # 2.739 GHz + 4,295,025,191 instructions # 2.27 insn per cycle + 0.692237484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 25) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.859865e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.862153e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.862153e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.772052 sec +INFO: No Floating Point Exceptions have been reported + 1,359,092,301 cycles # 1.753 GHz + 2,169,957,409 instructions # 1.60 insn per cycle + 0.776490041 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982957326E-003 +Relative difference = 2.0044082998332894e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index cfcc794bdd..e02407d644 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:20:43 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:25:53 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.664550e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.665186e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.665405e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.193141 sec +INFO: No Floating Point Exceptions have been reported + 7,365,717,542 cycles # 2.923 GHz + 16,291,118,073 instructions # 2.21 insn per cycle + 2.576836591 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.102923e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103265e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.419785 sec +INFO: No Floating Point Exceptions have been reported + 10,963,927,138 cycles # 2.923 GHz + 24,861,261,596 instructions # 2.27 insn per cycle + 3.806537159 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.186093e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186131e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186131e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.453659 sec -INFO: No Floating Point Exceptions have been reported - 15,438,755,787 cycles:u # 3.465 GHz (74.98%) - 2,696,401 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) - 1,605,984,207 stalled-cycles-backend:u # 10.40% backend cycles idle (75.05%) - 53,741,110,532 instructions:u # 3.48 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 4.464196839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.500673e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.500867e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.500867e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 7.041188 sec +INFO: No Floating Point Exceptions have been reported + 19,221,485,171 cycles # 2.729 GHz + 54,134,690,618 instructions # 2.82 insn per cycle + 7.045507456 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.456635e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.456794e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.456794e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.152502 sec -INFO: No Floating Point Exceptions have been reported - 7,422,082,381 cycles:u # 3.442 GHz (74.98%) - 1,988,064 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) - 816,635,761 stalled-cycles-backend:u # 11.00% backend cycles idle (74.81%) - 25,866,311,024 instructions:u # 3.49 insn per cycle - # 0.03 stalled cycles per insn (74.88%) - 2.207965851 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.537074e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.537163e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.537163e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.437220 sec +INFO: No Floating Point Exceptions have been reported + 9,396,080,919 cycles # 2.731 GHz + 26,188,082,836 instructions # 2.79 insn per cycle + 3.441517756 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.450735e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.451251e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.451251e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.970672 sec -INFO: No Floating Point Exceptions have been reported - 3,362,824,915 cycles:u # 3.455 GHz (74.57%) - 927,977 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.48%) - 306,236,549 stalled-cycles-backend:u # 9.11% backend cycles idle (74.89%) - 9,089,995,529 instructions:u # 2.70 insn per cycle - # 0.03 stalled cycles per insn (75.35%) - 0.977500128 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.541134e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.541635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.541635e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.493238 sec +INFO: No Floating Point Exceptions have been reported + 4,077,957,635 cycles # 2.724 GHz + 9,249,641,886 instructions # 2.27 insn per cycle + 1.497708781 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.136665e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.137271e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.137271e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.279311 sec +INFO: No Floating Point Exceptions have been reported + 3,517,339,720 cycles # 2.742 GHz + 8,183,228,052 instructions # 2.33 insn per cycle + 1.283633317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 80) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.501058e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.501647e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.501647e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.511075 sec +INFO: No Floating Point Exceptions have been reported + 2,666,286,599 cycles # 1.760 GHz + 4,173,044,119 instructions # 1.57 insn per cycle + 1.515586960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 92) (512z:78910) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 73a4cbfb60..59afbf5683 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-09-18_17:21:20 +make: Nothing to be done for 'all'. + +DATE: 2024-09-18_12:27:20 -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.673618e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.674137e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.674360e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.187725 sec +INFO: No Floating Point Exceptions have been reported + 7,320,649,548 cycles # 2.912 GHz + 16,262,382,237 instructions # 2.22 insn per cycle + 2.571114049 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.105826e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.106139e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106173e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.426824 sec +INFO: No Floating Point Exceptions have been reported + 11,001,686,020 cycles # 2.930 GHz + 25,147,468,300 instructions # 2.29 insn per cycle + 3.812692076 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.175682e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175719e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175719e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.492373 sec -INFO: No Floating Point Exceptions have been reported - 15,577,763,550 cycles:u # 3.466 GHz (74.92%) - 4,687,227 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.91%) - 1,702,139,874 stalled-cycles-backend:u # 10.93% backend cycles idle (74.93%) - 53,780,833,557 instructions:u # 3.45 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 4.498883454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.043178e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.043403e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.043403e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.566514 sec +INFO: No Floating Point Exceptions have been reported + 19,176,347,779 cycles # 2.919 GHz + 54,156,968,111 instructions # 2.82 insn per cycle + 6.570813145 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.466705e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466857e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.466857e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.141419 sec -INFO: No Floating Point Exceptions have been reported - 7,415,111,989 cycles:u # 3.459 GHz (75.01%) - 14,491,429 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.00%) - 753,324,685 stalled-cycles-backend:u # 10.16% backend cycles idle (75.00%) - 25,742,576,081 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.00%) - 2.147961105 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.555217e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.555303e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555303e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.398105 sec +INFO: No Floating Point Exceptions have been reported + 9,273,027,189 cycles # 2.726 GHz + 26,087,136,722 instructions # 2.81 insn per cycle + 3.402445291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.461169e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.461707e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.461707e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.968763 sec -INFO: No Floating Point Exceptions have been reported - 3,353,428,196 cycles:u # 3.452 GHz (74.49%) - 1,338,534 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.72%) - 289,176,814 stalled-cycles-backend:u # 8.62% backend cycles idle (75.13%) - 9,029,253,470 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (75.30%) - 0.975515131 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.537227e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.537679e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.537679e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.494127 sec +INFO: No Floating Point Exceptions have been reported + 4,071,118,335 cycles # 2.719 GHz + 9,214,803,224 instructions # 2.26 insn per cycle + 1.498443184 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.138433e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.139090e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.139090e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.277732 sec +INFO: No Floating Point Exceptions have been reported + 3,507,535,748 cycles # 2.738 GHz + 8,168,319,774 instructions # 2.33 insn per cycle + 1.282049677 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 230) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.543576e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.544114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.544114e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.493048 sec +INFO: No Floating Point Exceptions have been reported + 2,621,670,941 cycles # 1.752 GHz + 4,167,760,475 instructions # 1.59 insn per cycle + 1.497511330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1879) (512y: 174) (512z:78884) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 56928fe017..b0413f07b6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:08 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:19:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 750,810,383 cycles:u # 2.201 GHz (73.28%) - 2,685,589 stalled-cycles-frontend:u # 0.36% frontend cycles idle (68.25%) - 6,438,177 stalled-cycles-backend:u # 0.86% backend cycles idle (72.32%) - 1,309,783,720 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (78.01%) - 0.392567056 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.879954e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.891707e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.001488e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459044 sec +INFO: No Floating Point Exceptions have been reported + 1,939,663,698 cycles # 2.864 GHz + 2,747,739,655 instructions # 1.42 insn per cycle + 0.734387710 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.061438e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.512391e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.741979e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.541789 sec +INFO: No Floating Point Exceptions have been reported + 2,258,330,350 cycles # 2.885 GHz + 3,233,524,764 instructions # 1.43 insn per cycle + 0.842114758 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 945,127,956 cycles:u # 2.104 GHz (74.53%) - 2,369,108 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.38%) - 6,385,127 stalled-cycles-backend:u # 0.68% backend cycles idle (74.64%) - 1,559,615,497 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (73.60%) - 0.487906308 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.056900e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.079897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079897e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.570278 sec +INFO: No Floating Point Exceptions have been reported + 4,626,289,546 cycles # 2.939 GHz + 13,191,201,959 instructions # 2.85 insn per cycle + 1.574568894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499481 +Relative difference = 5.286896511435107e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.877819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.949205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.949205e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.891875 sec +INFO: No Floating Point Exceptions have been reported + 2,638,327,743 cycles # 2.947 GHz + 7,555,209,951 instructions # 2.86 insn per cycle + 0.896114078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.170773e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.377039e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.377039e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.536262 sec +INFO: No Floating Point Exceptions have been reported + 1,489,383,659 cycles # 2.759 GHz + 3,159,296,473 instructions # 2.12 insn per cycle + 0.540558254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.529419e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784986e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784986e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.483576 sec +INFO: No Floating Point Exceptions have been reported + 1,345,705,641 cycles # 2.762 GHz + 3,013,816,668 instructions # 2.24 insn per cycle + 0.487835073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.357874e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.470306e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.470306e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.716743 sec +INFO: No Floating Point Exceptions have been reported + 1,329,087,485 cycles # 1.845 GHz + 1,962,911,490 instructions # 1.48 insn per cycle + 0.721045759 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 3045dd42da..e338aa0c83 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,61 +1,282 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_18:07:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:54:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 788,352,573 cycles:u # 2.294 GHz (73.24%) - 3,031,600 stalled-cycles-frontend:u # 0.38% frontend cycles idle (69.20%) - 34,280,145 stalled-cycles-backend:u # 4.35% backend cycles idle (72.41%) - 1,272,941,365 instructions:u # 1.61 insn per cycle - # 0.03 stalled cycles per insn (77.37%) - 0.376352362 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.300988e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.591479e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.591479e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.483517 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,018,637,546 cycles # 2.889 GHz + 3,002,221,313 instructions # 1.49 insn per cycle + 0.755433693 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe: Segmentation fault - 2,987,537,745 cycles:u # 2.763 GHz (75.82%) - 16,562,938 stalled-cycles-frontend:u # 0.55% frontend cycles idle (75.58%) - 817,992,169 stalled-cycles-backend:u # 27.38% backend cycles idle (75.59%) - 3,167,893,164 instructions:u # 1.06 insn per cycle - # 0.26 stalled cycles per insn (75.61%) - 1.119291263 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.209513e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.250583e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.250583e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.757854 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,924,491,267 cycles # 2.893 GHz + 4,472,331,439 instructions # 1.53 insn per cycle + 1.067931667 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.054217e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.077389e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.077389e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.581825 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,664,515,506 cycles # 2.942 GHz + 13,198,020,525 instructions # 2.83 insn per cycle + 1.586342613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499481 +Relative difference = 5.286896511435107e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.861182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934526e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934526e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.908066 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,683,422,373 cycles # 2.942 GHz + 7,604,693,273 instructions # 2.83 insn per cycle + 0.912668086 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.136463e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344918e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344918e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.550693 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,532,887,808 cycles # 2.763 GHz + 3,210,306,872 instructions # 2.09 insn per cycle + 0.555384102 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.483226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.741231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.741231e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.498787 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,390,412,454 cycles # 2.766 GHz + 3,064,189,434 instructions # 2.20 insn per cycle + 0.503409402 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.324425e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.438930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.438930e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.734309 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,369,927,300 cycles # 1.856 GHz + 2,000,629,444 instructions # 1.46 insn per cycle + 0.738870915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 0a3aafbd7f..698af75849 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:19:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 811,304,021 cycles:u # 2.416 GHz (70.69%) - 2,521,242 stalled-cycles-frontend:u # 0.31% frontend cycles idle (72.81%) - 6,643,430 stalled-cycles-backend:u # 0.82% backend cycles idle (76.33%) - 1,326,175,431 instructions:u # 1.63 insn per cycle - # 0.01 stalled cycles per insn (76.30%) - 0.374116903 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.870113e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.853917e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.966787e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459366 sec +INFO: No Floating Point Exceptions have been reported + 1,939,416,729 cycles # 2.875 GHz + 2,719,660,225 instructions # 1.40 insn per cycle + 0.733418344 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.000378e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.373625e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.579737e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.544100 sec +INFO: No Floating Point Exceptions have been reported + 2,268,193,870 cycles # 2.866 GHz + 3,228,698,159 instructions # 1.42 insn per cycle + 0.849845463 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe: Segmentation fault - 951,602,277 cycles:u # 2.125 GHz (75.16%) - 2,406,140 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.11%) - 6,352,933 stalled-cycles-backend:u # 0.67% backend cycles idle (75.69%) - 1,523,514,711 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (74.26%) - 0.488589383 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.061980e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085030e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085030e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.562513 sec +INFO: No Floating Point Exceptions have been reported + 4,622,072,256 cycles # 2.951 GHz + 13,179,636,938 instructions # 2.85 insn per cycle + 1.566824554 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499481 +Relative difference = 5.286896511435107e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.876350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.948368e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.948368e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.892202 sec +INFO: No Floating Point Exceptions have been reported + 2,639,628,239 cycles # 2.947 GHz + 7,552,826,806 instructions # 2.86 insn per cycle + 0.896585147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467499475 +Relative difference = 5.286896515331313e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.183448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.393646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393646e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.534141 sec +INFO: No Floating Point Exceptions have been reported + 1,491,163,611 cycles # 2.773 GHz + 3,158,625,928 instructions # 2.12 insn per cycle + 0.538404277 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.492000e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.744364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744364e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.489086 sec +INFO: No Floating Point Exceptions have been reported + 1,346,762,343 cycles # 2.733 GHz + 3,011,186,186 instructions # 2.24 insn per cycle + 0.493386881 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.331076e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.442354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.442354e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.724419 sec +INFO: No Floating Point Exceptions have been reported + 1,327,007,586 cycles # 1.823 GHz + 1,960,723,409 instructions # 1.48 insn per cycle + 0.728619129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index e1363c40e9..8a6bb74f5e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:20:00 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 764,555,819 cycles:u # 2.292 GHz (74.60%) - 2,714,289 stalled-cycles-frontend:u # 0.36% frontend cycles idle (71.36%) - 6,306,367 stalled-cycles-backend:u # 0.82% backend cycles idle (72.97%) - 1,326,786,226 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (75.59%) - 0.376846597 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.830452e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.999598e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147092e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.454704 sec +INFO: No Floating Point Exceptions have been reported + 1,916,653,758 cycles # 2.859 GHz + 2,706,744,679 instructions # 1.41 insn per cycle + 0.728210028 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.474236e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.587297e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.949656e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.490292 sec +INFO: No Floating Point Exceptions have been reported + 2,078,449,093 cycles # 2.886 GHz + 2,974,210,572 instructions # 1.43 insn per cycle + 0.777065481 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 904,818,769 cycles:u # 2.159 GHz (75.09%) - 2,536,848 stalled-cycles-frontend:u # 0.28% frontend cycles idle (70.53%) - 6,504,319 stalled-cycles-backend:u # 0.72% backend cycles idle (71.17%) - 1,450,130,950 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (75.89%) - 0.458219922 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.100218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.125308e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.125308e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.507385 sec +INFO: No Floating Point Exceptions have been reported + 4,410,101,975 cycles # 2.919 GHz + 12,953,085,822 instructions # 2.94 insn per cycle + 1.511568329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.885848e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.067058e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.067058e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.584913 sec +INFO: No Floating Point Exceptions have been reported + 1,727,797,245 cycles # 2.936 GHz + 4,541,987,860 instructions # 2.63 insn per cycle + 0.589023498 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.703055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.396540e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.396540e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.305122 sec +INFO: No Floating Point Exceptions have been reported + 856,571,449 cycles # 2.776 GHz + 1,917,826,981 instructions # 2.24 insn per cycle + 0.309207440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.972249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.763699e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.763699e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.292353 sec +INFO: No Floating Point Exceptions have been reported + 806,013,891 cycles # 2.724 GHz + 1,834,284,908 instructions # 2.28 insn per cycle + 0.296525539 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.507099e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.952644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.952644e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.383525 sec +INFO: No Floating Point Exceptions have been reported + 728,616,899 cycles # 1.883 GHz + 1,308,760,783 instructions # 1.80 insn per cycle + 0.387733440 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index c373b3f7b1..a6b985fae9 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,61 +1,282 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_18:07:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:54:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 829,790,807 cycles:u # 2.462 GHz (71.01%) - 2,840,574 stalled-cycles-frontend:u # 0.34% frontend cycles idle (73.22%) - 25,476,620 stalled-cycles-backend:u # 3.07% backend cycles idle (75.10%) - 1,327,045,699 instructions:u # 1.60 insn per cycle - # 0.02 stalled cycles per insn (74.56%) - 0.445531606 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.927791e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.333519e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.333519e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 +TOTAL : 0.469351 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,981,672,619 cycles # 2.860 GHz + 2,838,745,918 instructions # 1.43 insn per cycle + 0.751805905 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe: Segmentation fault - 2,962,636,635 cycles:u # 2.883 GHz (74.00%) - 17,010,740 stalled-cycles-frontend:u # 0.57% frontend cycles idle (73.54%) - 830,632,643 stalled-cycles-backend:u # 28.04% backend cycles idle (74.38%) - 3,207,613,891 instructions:u # 1.08 insn per cycle - # 0.26 stalled cycles per insn (75.89%) - 1.062476529 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.989037e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.963677e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.963677e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 +TOTAL : 0.635748 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,515,384,416 cycles # 2.885 GHz + 3,805,896,684 instructions # 1.51 insn per cycle + 0.928757267 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.110811e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.136583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136583e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.496763 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,424,063,947 cycles # 2.949 GHz + 12,956,460,167 instructions # 2.93 insn per cycle + 1.501035221 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.857646e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.036818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.036818e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.595633 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,753,185,847 cycles # 2.926 GHz + 4,590,460,046 instructions # 2.62 insn per cycle + 0.599868062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.498095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.167392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.167392e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.320525 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 879,877,577 cycles # 2.713 GHz + 1,955,191,246 instructions # 2.22 insn per cycle + 0.324936571 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.017893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.823832e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.823832e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.294540 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 824,659,177 cycles # 2.764 GHz + 1,871,065,231 instructions # 2.27 insn per cycle + 0.298923642 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.488254e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.923976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.923976e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.389655 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 750,952,234 cycles # 1.909 GHz + 1,350,104,124 instructions # 1.80 insn per cycle + 0.394048329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 6daa4befac..67763acaac 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:16 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:20:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 755,655,034 cycles:u # 2.280 GHz (75.03%) - 2,296,361 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.95%) - 4,893,982 stalled-cycles-backend:u # 0.65% backend cycles idle (75.09%) - 1,244,581,285 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (76.03%) - 0.373528752 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.836197e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.010594e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149037e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.455895 sec +INFO: No Floating Point Exceptions have been reported + 1,937,517,385 cycles # 2.882 GHz + 2,695,733,072 instructions # 1.39 insn per cycle + 0.731352438 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 5.416288e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.368760e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.717095e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.494521 sec +INFO: No Floating Point Exceptions have been reported + 2,101,577,521 cycles # 2.872 GHz + 2,967,805,317 instructions # 1.41 insn per cycle + 0.790688389 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe: Segmentation fault - 916,714,792 cycles:u # 2.232 GHz (74.77%) - 2,420,584 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.50%) - 9,755,195 stalled-cycles-backend:u # 1.06% backend cycles idle (71.57%) - 1,510,333,163 instructions:u # 1.65 insn per cycle - # 0.01 stalled cycles per insn (73.48%) - 0.464608691 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.109320e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.134422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134422e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.495018 sec +INFO: No Floating Point Exceptions have been reported + 4,406,318,830 cycles # 2.941 GHz + 12,927,562,871 instructions # 2.93 insn per cycle + 1.499121241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.896108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081157e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.582933 sec +INFO: No Floating Point Exceptions have been reported + 1,729,684,566 cycles # 2.949 GHz + 4,536,959,704 instructions # 2.62 insn per cycle + 0.587227353 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.671417e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.388788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.388788e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.306400 sec +INFO: No Floating Point Exceptions have been reported + 861,419,707 cycles # 2.779 GHz + 1,914,521,871 instructions # 2.22 insn per cycle + 0.310539350 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3549) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.063623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.871376e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.871376e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.288149 sec +INFO: No Floating Point Exceptions have been reported + 805,096,427 cycles # 2.760 GHz + 1,830,123,182 instructions # 2.27 insn per cycle + 0.292238886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.516684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.964575e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.964575e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.382571 sec +INFO: No Floating Point Exceptions have been reported + 732,988,918 cycles # 1.898 GHz + 1,306,469,020 instructions # 1.78 insn per cycle + 0.386957442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1928) (512y: 24) (512z: 2435) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index b27e665ecc..91e0f5565c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:20:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 745,365,703 cycles:u # 2.195 GHz (75.59%) - 2,274,468 stalled-cycles-frontend:u # 0.31% frontend cycles idle (77.60%) - 11,316,455 stalled-cycles-backend:u # 1.52% backend cycles idle (76.30%) - 1,331,170,438 instructions:u # 1.79 insn per cycle - # 0.01 stalled cycles per insn (74.12%) - 0.376623083 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.873875e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.862522e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.986643e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.460578 sec +INFO: No Floating Point Exceptions have been reported + 1,947,256,638 cycles # 2.867 GHz + 2,707,543,109 instructions # 1.39 insn per cycle + 0.736559227 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.023480e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.410580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620297e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.541253 sec +INFO: No Floating Point Exceptions have been reported + 2,242,175,675 cycles # 2.877 GHz + 3,202,804,008 instructions # 1.43 insn per cycle + 0.837177537 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe: Segmentation fault - 920,133,495 cycles:u # 2.031 GHz (76.22%) - 2,391,120 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.30%) - 8,198,586 stalled-cycles-backend:u # 0.89% backend cycles idle (73.56%) - 1,597,948,911 instructions:u # 1.74 insn per cycle - # 0.01 stalled cycles per insn (71.62%) - 0.492743549 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.056357e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.078870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.078870e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.570709 sec +INFO: No Floating Point Exceptions have been reported + 4,639,217,468 cycles # 2.947 GHz + 13,177,906,216 instructions # 2.84 insn per cycle + 1.574828509 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.872603e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.943230e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.943230e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.894053 sec +INFO: No Floating Point Exceptions have been reported + 2,648,821,910 cycles # 2.951 GHz + 7,473,297,472 instructions # 2.82 insn per cycle + 0.898331919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.194377e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.403567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.403567e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.532589 sec +INFO: No Floating Point Exceptions have been reported + 1,476,927,402 cycles # 2.754 GHz + 3,127,083,010 instructions # 2.12 insn per cycle + 0.536841632 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3133) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.590247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.853965e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.853965e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.476058 sec +INFO: No Floating Point Exceptions have been reported + 1,323,043,261 cycles # 2.758 GHz + 2,981,146,980 instructions # 2.25 insn per cycle + 0.480339840 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.287752e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.394431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.394431e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.737861 sec +INFO: No Floating Point Exceptions have been reported + 1,365,080,339 cycles # 1.841 GHz + 1,989,993,648 instructions # 1.46 insn per cycle + 0.742169497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2251) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 362b389de7..bc8dd367d2 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,53 +1,258 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-09-18_17:18:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_12:20:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 754,182,859 cycles:u # 2.249 GHz (76.50%) - 2,383,388 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.24%) - 4,798,872 stalled-cycles-backend:u # 0.64% backend cycles idle (76.25%) - 1,233,149,890 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (76.10%) - 0.378348359 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.866883e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.866305e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.974692e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.459058 sec +INFO: No Floating Point Exceptions have been reported + 1,947,107,746 cycles # 2.878 GHz + 2,728,462,242 instructions # 1.40 insn per cycle + 0.734017841 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 3.016948e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.370809e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.575747e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.539080 sec +INFO: No Floating Point Exceptions have been reported + 2,244,664,779 cycles # 2.884 GHz + 3,243,168,469 instructions # 1.44 insn per cycle + 0.835323761 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424749e-01 +Avg ME (F77/GPU) = 0.14247482577104625 +Relative difference = 5.209967070245855e-07 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe: Segmentation fault - 1,015,084,795 cycles:u # 2.075 GHz (75.59%) - 2,599,616 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.55%) - 8,447,022 stalled-cycles-backend:u # 0.83% backend cycles idle (74.81%) - 1,501,488,691 instructions:u # 1.48 insn per cycle - # 0.01 stalled cycles per insn (73.59%) - 0.527880309 seconds time elapsed +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) +EvtsPerSec[Rmb+ME] (23) = ( 1.054078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.076959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.076959e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.574290 sec +INFO: No Floating Point Exceptions have been reported + 4,646,036,617 cycles # 2.945 GHz + 13,166,645,489 instructions # 2.83 insn per cycle + 1.578550564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.873438e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.944671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.944671e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.893595 sec +INFO: No Floating Point Exceptions have been reported + 2,639,674,089 cycles # 2.942 GHz + 7,474,954,292 instructions # 2.83 insn per cycle + 0.897961439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482734618697 +Relative difference = 5.099411406595165e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.194979e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.406933e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406933e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.532401 sec +INFO: No Floating Point Exceptions have been reported + 1,471,043,256 cycles # 2.744 GHz + 3,127,494,333 instructions # 2.13 insn per cycle + 0.536715670 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3111) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 -ERROR! C++ calculation (C++/GPU) failed +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.604804e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.871054e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.871054e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.474052 sec +INFO: No Floating Point Exceptions have been reported + 1,321,700,799 cycles # 2.767 GHz + 2,981,907,836 instructions # 2.26 insn per cycle + 0.478334854 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.246259e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.348752e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.348752e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.751218 sec +INFO: No Floating Point Exceptions have been reported + 1,373,432,632 cycles # 1.819 GHz + 1,989,927,175 instructions # 1.45 insn per cycle + 0.755614240 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2251) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) +========================================================================= + +TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 95fe9024c8..6ae2d07b8c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:28:53 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:35:28 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.710846e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.086431e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.101389e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.566710 sec -INFO: No Floating Point Exceptions have been reported - 984,481,609 cycles:u # 2.095 GHz (74.23%) - 2,500,193 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.70%) - 6,402,428 stalled-cycles-backend:u # 0.65% backend cycles idle (76.35%) - 1,516,815,030 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (76.44%) - 0.633868007 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.333836e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.844165e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.406248e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.534496 sec +INFO: No Floating Point Exceptions have been reported + 2,180,885,043 cycles # 2.827 GHz + 3,135,152,783 instructions # 1.44 insn per cycle + 0.828766444 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.021746e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.065340e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.065340e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.401735 sec -INFO: No Floating Point Exceptions have been reported - 16,414,406,731 cycles:u # 3.030 GHz (74.87%) - 9,369,069 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) - 1,982,352,272 stalled-cycles-backend:u # 12.08% backend cycles idle (75.03%) - 51,670,056,906 instructions:u # 3.15 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 5.421856685 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 746) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.605100e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.641462e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.641462e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.643809 sec +INFO: No Floating Point Exceptions have been reported + 19,303,523,142 cycles # 2.904 GHz + 51,922,542,271 instructions # 2.69 insn per cycle + 6.649309354 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.443699e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.581147e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.581147e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 3.257976 sec -INFO: No Floating Point Exceptions have been reported - 9,768,721,970 cycles:u # 2.984 GHz (74.89%) - 8,814,909 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.00%) - 3,067,265,282 stalled-cycles-backend:u # 31.40% backend cycles idle (75.08%) - 30,643,212,993 instructions:u # 3.14 insn per cycle - # 0.10 stalled cycles per insn (75.08%) - 3.278029410 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2833) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.864838e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.993187e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993187e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.771971 sec +INFO: No Floating Point Exceptions have been reported + 10,899,823,947 cycles # 2.886 GHz + 30,797,169,430 instructions # 2.83 insn per cycle + 3.777469678 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.181020e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.606420e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.606420e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.908782 sec -INFO: No Floating Point Exceptions have been reported - 5,603,812,106 cycles:u # 2.912 GHz (74.85%) - 8,977,539 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.03%) - 1,293,319,013 stalled-cycles-backend:u # 23.08% backend cycles idle (75.06%) - 13,411,282,597 instructions:u # 2.39 insn per cycle - # 0.10 stalled cycles per insn (75.07%) - 1.929025243 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2817) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.618832e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.953390e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.953390e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.382134 sec +INFO: No Floating Point Exceptions have been reported + 6,463,553,394 cycles # 2.708 GHz + 13,666,010,364 instructions # 2.11 insn per cycle + 2.387555326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.007992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.398956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.398956e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.205089 sec +INFO: No Floating Point Exceptions have been reported + 5,947,846,964 cycles # 2.692 GHz + 13,006,222,979 instructions # 2.19 insn per cycle + 2.210472243 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.325208e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493799e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.265150 sec +INFO: No Floating Point Exceptions have been reported + 5,846,999,066 cycles # 1.789 GHz + 8,588,678,582 instructions # 1.47 insn per cycle + 3.271052301 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1946) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 29ed63a3ea..a09eaeb7bd 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:29:07 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:35:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.726607e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.134440e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.150044e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.438497 sec -INFO: No Floating Point Exceptions have been reported - 972,883,385 cycles:u # 2.087 GHz (76.20%) - 2,368,661 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.99%) - 11,494,612 stalled-cycles-backend:u # 1.18% backend cycles idle (75.99%) - 1,553,739,793 instructions:u # 1.60 insn per cycle - # 0.01 stalled cycles per insn (75.14%) - 0.614122810 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.270085e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.841839e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.403601e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.533386 sec +INFO: No Floating Point Exceptions have been reported + 2,214,034,172 cycles # 2.879 GHz + 3,142,399,923 instructions # 1.42 insn per cycle + 0.826419344 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.159672e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.210145e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.210145e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.067234 sec -INFO: No Floating Point Exceptions have been reported - 15,404,137,486 cycles:u # 3.031 GHz (74.98%) - 10,569,748 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.99%) - 20,185,422 stalled-cycles-backend:u # 0.13% backend cycles idle (74.99%) - 49,937,587,239 instructions:u # 3.24 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 5.087532895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.706120e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.746757e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.746757e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.255979 sec +INFO: No Floating Point Exceptions have been reported + 18,389,967,178 cycles # 2.937 GHz + 50,052,771,539 instructions # 2.72 insn per cycle + 6.261520945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.573808e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.720491e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.720491e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 3.145304 sec -INFO: No Floating Point Exceptions have been reported - 9,399,582,430 cycles:u # 2.974 GHz (74.96%) - 10,449,980 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.97%) - 2,379,246,017 stalled-cycles-backend:u # 25.31% backend cycles idle (74.94%) - 29,294,986,128 instructions:u # 3.12 insn per cycle - # 0.08 stalled cycles per insn (74.94%) - 3.196349391 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2625) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.086242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.232589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232589e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.507867 sec +INFO: No Floating Point Exceptions have been reported + 10,373,977,217 cycles # 2.954 GHz + 29,174,589,795 instructions # 2.81 insn per cycle + 3.513510894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.345754e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.652728e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.652728e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.170411 sec -INFO: No Floating Point Exceptions have been reported - 6,455,524,959 cycles:u # 2.953 GHz (74.75%) - 9,364,879 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.94%) - 2,030,379,431 stalled-cycles-backend:u # 31.45% backend cycles idle (75.12%) - 15,173,978,080 instructions:u # 2.35 insn per cycle - # 0.13 stalled cycles per insn (75.12%) - 2.190787664 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3011) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.355224e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.644479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.644479e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.518598 sec +INFO: No Floating Point Exceptions have been reported + 6,982,239,473 cycles # 2.767 GHz + 15,149,066,703 instructions # 2.17 insn per cycle + 2.524208385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3020) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.542431e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.862341e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.862341e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.419194 sec +INFO: No Floating Point Exceptions have been reported + 6,707,959,962 cycles # 2.767 GHz + 14,619,001,595 instructions # 2.18 insn per cycle + 2.424680502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2621) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.289276e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449465e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.297038 sec +INFO: No Floating Point Exceptions have been reported + 6,083,017,370 cycles # 1.843 GHz + 10,339,705,857 instructions # 1.70 insn per cycle + 3.302657897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2129) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 4971001236..50a3de8673 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:29:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:36:21 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.361472e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.486858e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.524263e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 -TOTAL : 0.380721 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.744477e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.525834e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.617286e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.492808 sec INFO: No Floating Point Exceptions have been reported - 841,371,168 cycles:u # 2.087 GHz (73.47%) - 2,458,726 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.18%) - 8,058,496 stalled-cycles-backend:u # 0.96% backend cycles idle (74.42%) - 1,505,744,040 instructions:u # 1.79 insn per cycle - # 0.01 stalled cycles per insn (75.25%) - 0.440938607 seconds time elapsed + 2,047,170,412 cycles # 2.828 GHz + 2,929,586,090 instructions # 1.43 insn per cycle + 0.781904908 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525361867622 -Relative difference = 6.615515935930387e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.247195e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.301171e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.301171e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.834772 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 14,778,482,520 cycles:u # 3.051 GHz (74.88%) - 17,012,751 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.91%) - 2,657,130,065 stalled-cycles-backend:u # 17.98% backend cycles idle (74.99%) - 51,520,305,643 instructions:u # 3.49 insn per cycle - # 0.05 stalled cycles per insn (75.07%) - 4.848724315 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 723) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.678996e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.720654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.720654e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.333448 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,607,993,167 cycles # 2.936 GHz + 51,216,519,035 instructions # 2.75 insn per cycle + 6.339213853 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,36 +104,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737704578787 -Relative difference = 5.321390598852464e-08 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.124807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.418890e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.418890e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 2.214214 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,715,567,089 cycles:u # 3.020 GHz (74.75%) - 11,820,134 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.84%) - 2,607,837,420 stalled-cycles-backend:u # 38.83% backend cycles idle (74.92%) - 18,638,250,240 instructions:u # 2.78 insn per cycle - # 0.14 stalled cycles per insn (75.11%) - 2.227937542 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3319) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.022786e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.287209e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.287209e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.694054 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,934,623,764 cycles # 2.940 GHz + 19,316,417,604 instructions # 2.43 insn per cycle + 2.699461082 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,36 +138,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733226081356 -Relative difference = 7.478907526568244e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.953846e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.107187e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.107187e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.221386 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,581,474,569 cycles:u # 2.910 GHz (74.72%) - 7,444,068 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.69%) - 1,125,447,586 stalled-cycles-backend:u # 31.42% backend cycles idle (74.99%) - 8,605,995,544 instructions:u # 2.40 insn per cycle - # 0.13 stalled cycles per insn (75.31%) - 1.235285796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3600) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.880495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.877642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.877642e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.418247 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,951,478,174 cycles # 2.777 GHz + 8,833,281,557 instructions # 2.24 insn per cycle + 1.423672827 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,16 +172,78 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650658514351 -Relative difference = 1.526612799754012e-08 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.368251e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.499225e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.499225e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.339980 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,727,978,138 cycles # 2.773 GHz + 8,431,050,226 instructions # 2.26 insn per cycle + 1.345489073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3541) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.964882e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.513882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.513882e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.846291 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,506,879,162 cycles # 1.895 GHz + 6,243,949,016 instructions # 1.78 insn per cycle + 1.851728712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2325) (512y: 22) (512z: 2290) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 329c69ad81..2b5536237c 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:29:34 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:36:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.516700e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.752203e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.792287e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 -TOTAL : 0.379886 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.958341e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.585012e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687226e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.488902 sec INFO: No Floating Point Exceptions have been reported - 879,908,839 cycles:u # 2.184 GHz (73.86%) - 2,356,817 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.26%) - 7,801,515 stalled-cycles-backend:u # 0.89% backend cycles idle (73.22%) - 1,507,876,997 instructions:u # 1.71 insn per cycle - # 0.01 stalled cycles per insn (76.16%) - 0.441954457 seconds time elapsed + 2,054,269,206 cycles # 2.862 GHz + 2,934,748,812 instructions # 1.43 insn per cycle + 0.774105073 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525361867622 -Relative difference = 6.615515935930387e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.413549e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.475820e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.475820e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.512885 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,834,776,140 cycles:u # 3.059 GHz (74.91%) - 17,954,574 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.92%) - 294,659,305 stalled-cycles-backend:u # 2.13% backend cycles idle (75.01%) - 49,452,636,042 instructions:u # 3.57 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 4.526797728 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 614) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.738704e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.782579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.782579e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.118201 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,018,613,315 cycles # 2.943 GHz + 49,602,263,054 instructions # 2.75 insn per cycle + 6.123752242 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,36 +104,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737704578787 -Relative difference = 5.321390598852464e-08 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.029890e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.445427e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.445427e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.906181 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,696,213,504 cycles:u # 2.973 GHz (74.95%) - 11,922,190 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.95%) - 1,744,482,482 stalled-cycles-backend:u # 30.63% backend cycles idle (74.99%) - 18,249,984,554 instructions:u # 3.20 insn per cycle - # 0.10 stalled cycles per insn (74.99%) - 1.920527841 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3078) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.513439e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.846420e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.846420e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.410664 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,118,641,278 cycles # 2.947 GHz + 18,533,207,759 instructions # 2.60 insn per cycle + 2.416130283 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,36 +138,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733226081356 -Relative difference = 7.478907526568244e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.422216e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.025606e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.025606e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.579219 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,687,540,598 cycles:u # 2.951 GHz (74.87%) - 8,469,245 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.83%) - 1,829,658,675 stalled-cycles-backend:u # 39.03% backend cycles idle (74.87%) - 10,816,107,004 instructions:u # 2.31 insn per cycle - # 0.17 stalled cycles per insn (74.87%) - 1.593000379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4259) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.337179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.778552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.778552e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.052609 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,666,208,381 cycles # 2.754 GHz + 10,850,402,094 instructions # 1.91 insn per cycle + 2.057862471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,18 +172,82 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650658514351 -Relative difference = 1.526612799754012e-08 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.416639e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.866517e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.866517e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.022314 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,555,880,143 cycles # 2.741 GHz + 10,551,186,314 instructions # 1.90 insn per cycle + 2.027927255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4138) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.322863e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.603781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.603781e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.514102 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,668,008,181 cycles # 1.854 GHz + 8,659,615,849 instructions # 1.86 insn per cycle + 2.519706497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index f201fc1612..3c9a7750d0 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:29:46 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:37:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.708542e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.083156e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.097984e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.442135 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.259037e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.833623e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.391198e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.533343 sec INFO: No Floating Point Exceptions have been reported - 1,032,232,511 cycles:u # 2.193 GHz (73.99%) - 2,557,831 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.54%) - 7,206,774 stalled-cycles-backend:u # 0.70% backend cycles idle (74.51%) - 1,625,877,587 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.28%) - 0.509858346 seconds time elapsed + 2,205,791,107 cycles # 2.867 GHz + 3,166,074,888 instructions # 1.44 insn per cycle + 0.826367468 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.933291e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.972913e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.972913e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.636784 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 17,161,131,724 cycles:u # 3.036 GHz (74.96%) - 35,183,856 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.95%) - 2,366,266,520 stalled-cycles-backend:u # 13.79% backend cycles idle (74.95%) - 51,682,146,779 instructions:u # 3.01 insn per cycle - # 0.05 stalled cycles per insn (75.01%) - 5.657081378 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.526469e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.558963e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.558963e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.979629 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 20,509,216,850 cycles # 2.937 GHz + 51,923,869,243 instructions # 2.53 insn per cycle + 6.985125737 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -95,27 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.443515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.580774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.580774e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 3.257345 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,805,457,186 cycles:u # 2.996 GHz (74.89%) - 15,135,275 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.01%) - 3,040,971,113 stalled-cycles-backend:u # 31.01% backend cycles idle (75.07%) - 30,521,333,927 instructions:u # 3.11 insn per cycle - # 0.10 stalled cycles per insn (75.07%) - 3.277508059 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2927) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.719239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.833565e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.833565e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.966787 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,507,632,981 cycles # 2.897 GHz + 30,592,941,946 instructions # 2.66 insn per cycle + 3.972658763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -132,27 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.320550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.767220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.767220e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.871561 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,482,068,682 cycles:u # 2.906 GHz (74.99%) - 13,194,340 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.99%) - 1,326,892,594 stalled-cycles-backend:u # 24.20% backend cycles idle (75.02%) - 13,323,379,955 instructions:u # 2.43 insn per cycle - # 0.10 stalled cycles per insn (75.03%) - 1.891317443 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3019) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.525746e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.838241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.838241e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.427006 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,694,021,096 cycles # 2.753 GHz + 13,606,483,540 instructions # 2.03 insn per cycle + 2.432521216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -169,9 +181,73 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.956630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.333349e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.333349e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.225975 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,165,401,380 cycles # 2.764 GHz + 12,974,481,027 instructions # 2.10 insn per cycle + 2.231658259 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.095455e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.237519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.237519e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.497825 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,439,450,147 cycles # 1.839 GHz + 8,701,510,932 instructions # 1.35 insn per cycle + 3.503267717 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2014) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index c22be99f5f..008d0a9d35 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-09-18_19:30:01 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:37:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.719205e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.142967e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.158724e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.432342 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.252482e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.819370e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.388849e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.535514 sec INFO: No Floating Point Exceptions have been reported - 968,936,180 cycles:u # 2.116 GHz (74.90%) - 2,476,132 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.64%) - 5,655,333 stalled-cycles-backend:u # 0.58% backend cycles idle (75.65%) - 1,567,897,246 instructions:u # 1.62 insn per cycle - # 0.00 stalled cycles per insn (73.82%) - 0.495612886 seconds time elapsed + 2,214,143,603 cycles # 2.876 GHz + 3,159,539,235 instructions # 1.43 insn per cycle + 0.828878265 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.136897e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.186073e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.186073e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 5.117671 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,609,174,700 cycles:u # 3.041 GHz (74.91%) - 32,659,892 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.98%) - 59,265,580 stalled-cycles-backend:u # 0.38% backend cycles idle (75.06%) - 49,816,715,411 instructions:u # 3.19 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 5.137159828 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 652) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.608272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.644385e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.644385e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.629223 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 19,498,919,287 cycles # 2.939 GHz + 49,953,158,127 instructions # 2.56 insn per cycle + 6.634747708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -95,27 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.526138e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.669586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.669586e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 3.185940 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,543,011,529 cycles:u # 2.981 GHz (75.01%) - 15,702,421 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.01%) - 1,892,189,851 stalled-cycles-backend:u # 19.83% backend cycles idle (75.02%) - 29,001,613,634 instructions:u # 3.04 insn per cycle - # 0.07 stalled cycles per insn (75.03%) - 3.205548627 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2723) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.887478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.016037e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.016037e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.741933 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,045,759,193 cycles # 2.948 GHz + 29,138,468,069 instructions # 2.64 insn per cycle + 3.747566884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -132,27 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.204840e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.502191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.502191e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.226867 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,562,119,728 cycles:u # 2.926 GHz (75.04%) - 19,235,879 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.03%) - 2,242,235,753 stalled-cycles-backend:u # 34.17% backend cycles idle (75.03%) - 15,061,934,031 instructions:u # 2.30 insn per cycle - # 0.15 stalled cycles per insn (74.89%) - 2.246971422 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.735821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.946793e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.946793e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.917806 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,064,126,572 cycles # 2.759 GHz + 15,188,166,070 instructions # 1.88 insn per cycle + 2.923408860 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3203) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -169,9 +181,73 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.934941e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.167424e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167424e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.773801 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,685,843,393 cycles # 2.766 GHz + 14,482,526,269 instructions # 1.88 insn per cycle + 2.779397074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 304) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.028557e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.163339e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.163339e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.571755 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,530,752,454 cycles # 1.826 GHz + 9,894,967,129 instructions # 1.52 insn per cycle + 3.577461945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2216) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 2c146f6b15..052ae7ee83 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:17 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:34:22 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.668468e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.143595e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.145469e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.439278 sec -INFO: No Floating Point Exceptions have been reported - 1,037,942,369 cycles:u # 2.354 GHz (75.69%) - 2,302,219 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.65%) - 5,992,381 stalled-cycles-backend:u # 0.58% backend cycles idle (75.86%) - 1,587,348,327 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (74.08%) - 0.494244162 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.764082e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.781890e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.785142e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.472813 sec +INFO: No Floating Point Exceptions have been reported + 1,988,958,737 cycles # 2.864 GHz + 2,937,434,860 instructions # 1.48 insn per cycle + 0.752740146 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.119449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268041e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268542e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.450274 sec -INFO: No Floating Point Exceptions have been reported - 1,125,286,471 cycles:u # 2.408 GHz (74.88%) - 2,461,221 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.05%) - 10,321,273 stalled-cycles-backend:u # 0.92% backend cycles idle (75.21%) - 1,590,720,369 instructions:u # 1.41 insn per cycle - # 0.01 stalled cycles per insn (74.40%) - 0.505235841 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.003017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.119483e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.127951e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.489631 sec +INFO: No Floating Point Exceptions have been reported + 2,045,084,255 cycles # 2.869 GHz + 3,023,069,261 instructions # 1.48 insn per cycle + 0.771335484 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.512477e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.517340e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.517340e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.118985 sec -INFO: No Floating Point Exceptions have been reported - 363,156,268 cycles:u # 2.984 GHz (73.84%) - 34,260 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.74%) - 40,619,768 stalled-cycles-backend:u # 11.19% backend cycles idle (73.74%) - 1,331,399,379 instructions:u # 3.67 insn per cycle - # 0.03 stalled cycles per insn (73.74%) - 0.125991835 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.395968e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399199e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399199e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.157638 sec +INFO: No Floating Point Exceptions have been reported + 469,190,775 cycles # 2.915 GHz + 1,389,792,831 instructions # 2.96 insn per cycle + 0.161480291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.791838e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.809548e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.809548e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.062074 sec -INFO: No Floating Point Exceptions have been reported - 194,932,402 cycles:u # 3.011 GHz (75.11%) - 30,067 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.35%) - 22,522,994 stalled-cycles-backend:u # 11.55% backend cycles idle (75.35%) - 668,460,952 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (75.35%) - 0.069048897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8749) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.497864e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.511372e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.511372e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.083349 sec +INFO: No Floating Point Exceptions have been reported + 241,222,273 cycles # 2.780 GHz + 693,002,253 instructions # 2.87 insn per cycle + 0.087370180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.870399e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.878181e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.878181e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.030271 sec -INFO: No Floating Point Exceptions have been reported - 89,416,327 cycles:u # 2.716 GHz (76.20%) - 71,315 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.83%) - 11,520,109 stalled-cycles-backend:u # 12.88% backend cycles idle (75.83%) - 238,428,202 instructions:u # 2.67 insn per cycle - # 0.05 stalled cycles per insn (75.82%) - 0.037318574 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7869) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.431164e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.437397e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.437397e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038955 sec +INFO: No Floating Point Exceptions have been reported + 115,308,474 cycles # 2.709 GHz + 257,920,071 instructions # 2.24 insn per cycle + 0.043236547 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.580017e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.587312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587312e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.035473 sec +INFO: No Floating Point Exceptions have been reported + 102,969,893 cycles # 2.655 GHz + 240,051,517 instructions # 2.33 insn per cycle + 0.039391596 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8143) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.194413e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.199659e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.199659e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.046450 sec +INFO: No Floating Point Exceptions have been reported + 90,344,224 cycles # 1.811 GHz + 134,320,028 instructions # 1.49 insn per cycle + 0.050486009 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7086) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 399355ad74..a192f75604 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:23 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:34:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.984818e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.488022e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.490089e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.418542 sec -INFO: No Floating Point Exceptions have been reported - 1,027,313,403 cycles:u # 2.363 GHz (75.92%) - 2,492,833 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.76%) - 5,138,161 stalled-cycles-backend:u # 0.50% backend cycles idle (76.29%) - 1,497,202,732 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (75.95%) - 0.474829105 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.801517e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.819462e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.822714e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.476112 sec +INFO: No Floating Point Exceptions have been reported + 2,002,117,199 cycles # 2.852 GHz + 2,866,160,766 instructions # 1.43 insn per cycle + 0.760468280 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.150169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.294351e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.294874e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.447753 sec -INFO: No Floating Point Exceptions have been reported - 1,107,969,800 cycles:u # 2.380 GHz (75.88%) - 2,409,302 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.95%) - 6,691,932 stalled-cycles-backend:u # 0.60% backend cycles idle (75.98%) - 1,616,550,803 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (73.95%) - 0.503491277 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.078707e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.191865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.200143e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.489613 sec +INFO: No Floating Point Exceptions have been reported + 2,042,241,108 cycles # 2.868 GHz + 2,998,598,647 instructions # 1.47 insn per cycle + 0.772399138 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.524239e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.529078e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.529078e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.118128 sec -INFO: No Floating Point Exceptions have been reported - 368,723,264 cycles:u # 3.053 GHz (72.37%) - 30,919 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.55%) - 47,382,382 stalled-cycles-backend:u # 12.85% backend cycles idle (73.55%) - 1,330,184,390 instructions:u # 3.61 insn per cycle - # 0.04 stalled cycles per insn (73.55%) - 0.125307087 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1597) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.407520e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.410948e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.410948e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.156563 sec +INFO: No Floating Point Exceptions have been reported + 466,584,758 cycles # 2.920 GHz + 1,385,250,664 instructions # 2.97 insn per cycle + 0.160376464 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.773195e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.790673e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.790673e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061655 sec -INFO: No Floating Point Exceptions have been reported - 184,751,906 cycles:u # 2.876 GHz (75.16%) - 49,149 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.16%) - 20,265,437 stalled-cycles-backend:u # 10.97% backend cycles idle (75.16%) - 666,237,753 instructions:u # 3.61 insn per cycle - # 0.03 stalled cycles per insn (75.16%) - 0.068873648 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8794) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.449696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.462962e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.462962e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.083015 sec +INFO: No Floating Point Exceptions have been reported + 239,636,465 cycles # 2.770 GHz + 689,080,119 instructions # 2.88 insn per cycle + 0.087201828 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.869543e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877173e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877173e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.029734 sec -INFO: No Floating Point Exceptions have been reported - 86,265,079 cycles:u # 2.667 GHz (75.40%) - 21,439 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.40%) - 9,672,702 stalled-cycles-backend:u # 11.21% backend cycles idle (75.40%) - 235,809,484 instructions:u # 2.73 insn per cycle - # 0.04 stalled cycles per insn (75.40%) - 0.036725984 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7839) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.414254e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.419861e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.419861e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038646 sec +INFO: No Floating Point Exceptions have been reported + 111,994,100 cycles # 2.669 GHz + 253,518,298 instructions # 2.26 insn per cycle + 0.042520952 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8457) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.642367e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.650155e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.650155e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.033489 sec +INFO: No Floating Point Exceptions have been reported + 100,655,003 cycles # 2.733 GHz + 235,667,417 instructions # 2.34 insn per cycle + 0.037423166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8101) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.198873e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.203973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203973e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.045540 sec +INFO: No Floating Point Exceptions have been reported + 88,110,981 cycles # 1.799 GHz + 129,713,745 instructions # 1.47 insn per cycle + 0.049588057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7084) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 8963676ee4..0a43242226 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:29 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:34:44 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.154909e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304119e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.304678e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 -TOTAL : 0.410468 sec -INFO: No Floating Point Exceptions have been reported - 968,020,432 cycles:u # 2.381 GHz (74.32%) - 2,566,500 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.57%) - 8,893,630 stalled-cycles-backend:u # 0.92% backend cycles idle (74.04%) - 1,495,841,709 instructions:u # 1.55 insn per cycle - # 0.01 stalled cycles per insn (75.32%) - 0.465948790 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.214942e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.224129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.226295e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.475163 sec +INFO: No Floating Point Exceptions have been reported + 1,995,760,495 cycles # 2.876 GHz + 2,898,607,116 instructions # 1.45 insn per cycle + 0.751350588 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.840077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.357410e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.358906e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 -TOTAL : 0.409687 sec -INFO: No Floating Point Exceptions have been reported - 1,012,721,169 cycles:u # 2.379 GHz (75.55%) - 2,506,424 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.57%) - 5,337,641 stalled-cycles-backend:u # 0.53% backend cycles idle (75.67%) - 1,521,579,474 instructions:u # 1.50 insn per cycle - # 0.00 stalled cycles per insn (73.13%) - 0.462809781 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.954269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.031370e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.039107e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 +TOTAL : 0.476219 sec +INFO: No Floating Point Exceptions have been reported + 1,999,149,645 cycles # 2.878 GHz + 2,913,422,324 instructions # 1.46 insn per cycle + 0.751593441 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127375e-06 -Avg ME (F77/GPU) = 8.1275160277913510E-006 -Relative difference = 1.735219444797551e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272869669930272E-006 +Relative difference = 4.548524165778887e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.665055e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.670106e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.670106e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.115176 sec -INFO: No Floating Point Exceptions have been reported - 349,347,512 cycles:u # 2.966 GHz (72.87%) - 36,350 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.87%) - 46,649,985 stalled-cycles-backend:u # 13.35% backend cycles idle (72.20%) - 1,343,502,173 instructions:u # 3.85 insn per cycle - # 0.03 stalled cycles per insn (75.59%) - 0.122057755 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1635) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.411294e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.414706e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.414706e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.156868 sec +INFO: No Floating Point Exceptions have been reported + 464,525,374 cycles # 2.900 GHz + 1,382,008,460 instructions # 2.98 insn per cycle + 0.160803882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278101435899343E-006 -Relative difference = 1.76664974860306e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.658219e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664629e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664629e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.033799 sec -INFO: No Floating Point Exceptions have been reported - 111,754,632 cycles:u # 3.069 GHz (66.33%) - 25,690 stalled-cycles-frontend:u # 0.02% frontend cycles idle (78.14%) - 15,926,909 stalled-cycles-backend:u # 14.25% backend cycles idle (78.13%) - 350,152,006 instructions:u # 3.13 insn per cycle - # 0.05 stalled cycles per insn (78.14%) - 0.040914810 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.203598e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.208165e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.045866 sec +INFO: No Floating Point Exceptions have been reported + 133,138,155 cycles # 2.706 GHz + 372,169,369 instructions # 2.80 insn per cycle + 0.049817482 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071402353976E-006 -Relative difference = 1.725378052944308e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.616514e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.647967e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.647967e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.016569 sec -INFO: No Floating Point Exceptions have been reported - 47,284,090 cycles:u # 2.458 GHz (61.55%) - 22,430 stalled-cycles-frontend:u # 0.05% frontend cycles idle (58.79%) - 4,344,587 stalled-cycles-backend:u # 9.19% backend cycles idle (58.78%) - 130,336,025 instructions:u # 2.76 insn per cycle - # 0.03 stalled cycles per insn (67.09%) - 0.023718224 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8628) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.784499e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.809977e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.809977e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.020924 sec +INFO: No Floating Point Exceptions have been reported + 65,424,959 cycles # 2.700 GHz + 142,812,066 instructions # 2.18 insn per cycle + 0.024819725 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9241) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275351122593251E-006 -Relative difference = 1.3812222848044195e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.962557e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.993097e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.993097e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.019867 sec +INFO: No Floating Point Exceptions have been reported + 60,581,334 cycles # 2.611 GHz + 132,865,474 instructions # 2.19 insn per cycle + 0.023738141 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8959) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.316896e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.339579e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.339579e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.024992 sec +INFO: No Floating Point Exceptions have been reported + 52,575,011 cycles # 1.850 GHz + 79,563,519 instructions # 1.51 insn per cycle + 0.029028726 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2836) (512y: 30) (512z: 7437) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 7cdd584b43..81fec428b9 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:34:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.144180e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.289182e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.289765e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 -TOTAL : 0.387363 sec -INFO: No Floating Point Exceptions have been reported - 957,531,938 cycles:u # 2.375 GHz (75.15%) - 2,450,003 stalled-cycles-frontend:u # 0.26% frontend cycles idle (76.17%) - 6,366,991 stalled-cycles-backend:u # 0.66% backend cycles idle (76.29%) - 1,403,761,770 instructions:u # 1.47 insn per cycle - # 0.00 stalled cycles per insn (76.27%) - 0.442190638 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.237744e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.247254e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.249233e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.475021 sec +INFO: No Floating Point Exceptions have been reported + 1,993,743,022 cycles # 2.872 GHz + 2,918,324,117 instructions # 1.46 insn per cycle + 0.750958800 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.815796e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.310260e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.311812e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 -TOTAL : 0.410025 sec -INFO: No Floating Point Exceptions have been reported - 1,018,395,407 cycles:u # 2.395 GHz (75.25%) - 2,460,070 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.36%) - 7,419,542 stalled-cycles-backend:u # 0.73% backend cycles idle (74.27%) - 1,529,000,005 instructions:u # 1.50 insn per cycle - # 0.00 stalled cycles per insn (73.41%) - 0.466391689 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.067375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.148140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.156186e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.476461 sec +INFO: No Floating Point Exceptions have been reported + 1,993,725,610 cycles # 2.868 GHz + 2,900,779,066 instructions # 1.45 insn per cycle + 0.752726088 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127375e-06 -Avg ME (F77/GPU) = 8.1275164883853706E-006 -Relative difference = 1.740886637704508e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272866419447706E-006 +Relative difference = 4.508529302013153e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.661400e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.666444e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.666444e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.114706 sec -INFO: No Floating Point Exceptions have been reported - 352,310,785 cycles:u # 3.003 GHz (73.30%) - 32,201 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.77%) - 39,110,027 stalled-cycles-backend:u # 11.10% backend cycles idle (72.77%) - 1,325,206,070 instructions:u # 3.76 insn per cycle - # 0.03 stalled cycles per insn (72.95%) - 0.121742553 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1608) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.438799e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.442175e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.442175e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.155006 sec +INFO: No Floating Point Exceptions have been reported + 462,147,018 cycles # 2.920 GHz + 1,376,798,562 instructions # 2.98 insn per cycle + 0.158894971 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278101435899343E-006 -Relative difference = 1.76664974860306e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.663127e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.669548e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.669548e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.033155 sec -INFO: No Floating Point Exceptions have been reported - 107,195,720 cycles:u # 2.992 GHz (69.15%) - 20,939 stalled-cycles-frontend:u # 0.02% frontend cycles idle (77.78%) - 15,459,235 stalled-cycles-backend:u # 14.42% backend cycles idle (77.78%) - 347,025,294 instructions:u # 3.24 insn per cycle - # 0.04 stalled cycles per insn (77.78%) - 0.040164534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9253) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.224501e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.229267e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.229267e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.044280 sec +INFO: No Floating Point Exceptions have been reported + 130,643,774 cycles # 2.744 GHz + 367,253,267 instructions # 2.81 insn per cycle + 0.048214582 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071402353976E-006 -Relative difference = 1.725378052944308e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.612156e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.642468e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.642468e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.016083 sec -INFO: No Floating Point Exceptions have been reported - 48,800,876 cycles:u # 2.613 GHz (62.04%) - 19,322 stalled-cycles-frontend:u # 0.04% frontend cycles idle (57.57%) - 4,617,313 stalled-cycles-backend:u # 9.46% backend cycles idle (57.56%) - 126,177,930 instructions:u # 2.59 insn per cycle - # 0.04 stalled cycles per insn (63.92%) - 0.022738865 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8595) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.785213e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.809806e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.809806e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.020207 sec +INFO: No Floating Point Exceptions have been reported + 63,247,605 cycles # 2.692 GHz + 138,006,301 instructions # 2.18 insn per cycle + 0.024065097 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275351122593251E-006 -Relative difference = 1.3812222848044195e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.053192e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.081685e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.081685e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.018541 sec +INFO: No Floating Point Exceptions have been reported + 58,249,945 cycles # 2.668 GHz + 127,981,629 instructions # 2.20 insn per cycle + 0.022408862 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8910) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.336383e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.358299e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.358299e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.024035 sec +INFO: No Floating Point Exceptions have been reported + 50,478,559 cycles # 1.838 GHz + 74,763,022 instructions # 1.48 insn per cycle + 0.028059996 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2791) (512y: 30) (512z: 7439) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index b711462f1b..59d9b0aed3 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:35:06 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.661949e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.199247e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.201096e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.417312 sec -INFO: No Floating Point Exceptions have been reported - 1,062,085,104 cycles:u # 2.456 GHz (76.43%) - 2,355,484 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.05%) - 6,266,104 stalled-cycles-backend:u # 0.59% backend cycles idle (74.78%) - 1,591,646,638 instructions:u # 1.50 insn per cycle - # 0.00 stalled cycles per insn (73.98%) - 0.466599491 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.754823e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.776415e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779447e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.471377 sec +INFO: No Floating Point Exceptions have been reported + 1,997,323,985 cycles # 2.874 GHz + 2,899,694,458 instructions # 1.45 insn per cycle + 0.752307454 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.124047e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.274352e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.274851e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.471017 sec -INFO: No Floating Point Exceptions have been reported - 1,152,786,923 cycles:u # 2.462 GHz (74.79%) - 2,473,272 stalled-cycles-frontend:u # 0.21% frontend cycles idle (76.01%) - 5,503,635 stalled-cycles-backend:u # 0.48% backend cycles idle (76.15%) - 1,578,639,124 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (76.21%) - 0.521582548 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.948061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.061017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.069565e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.489196 sec +INFO: No Floating Point Exceptions have been reported + 2,036,665,309 cycles # 2.870 GHz + 3,021,584,007 instructions # 1.48 insn per cycle + 0.771275990 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.550757e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.555611e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.555611e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.118022 sec -INFO: No Floating Point Exceptions have been reported - 362,163,408 cycles:u # 3.001 GHz (73.88%) - 38,253 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.52%) - 38,323,839 stalled-cycles-backend:u # 10.58% backend cycles idle (73.52%) - 1,339,885,187 instructions:u # 3.70 insn per cycle - # 0.03 stalled cycles per insn (73.52%) - 0.125046952 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1630) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.346871e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.350343e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.350343e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.160031 sec +INFO: No Floating Point Exceptions have been reported + 472,933,421 cycles # 2.893 GHz + 1,398,381,136 instructions # 2.96 insn per cycle + 0.164085482 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.867085e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.884293e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.884293e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.061531 sec -INFO: No Floating Point Exceptions have been reported - 184,178,339 cycles:u # 2.873 GHz (75.11%) - 30,115 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.11%) - 23,284,087 stalled-cycles-backend:u # 12.64% backend cycles idle (75.11%) - 664,412,984 instructions:u # 3.61 insn per cycle - # 0.04 stalled cycles per insn (75.11%) - 0.068525162 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8728) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.641661e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.653702e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.653702e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.081430 sec +INFO: No Floating Point Exceptions have been reported + 237,272,954 cycles # 2.797 GHz + 688,192,491 instructions # 2.90 insn per cycle + 0.085340914 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.903962e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.912090e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.912090e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.029761 sec -INFO: No Floating Point Exceptions have been reported - 94,108,055 cycles:u # 2.906 GHz (75.81%) - 26,438 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.43%) - 10,714,661 stalled-cycles-backend:u # 11.39% backend cycles idle (75.43%) - 234,365,644 instructions:u # 2.49 insn per cycle - # 0.05 stalled cycles per insn (75.43%) - 0.036588127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7892) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.416781e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.422580e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.422580e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.039309 sec +INFO: No Floating Point Exceptions have been reported + 114,214,565 cycles # 2.672 GHz + 253,122,283 instructions # 2.22 insn per cycle + 0.043386095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.596060e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.604256e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.604256e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.035081 sec +INFO: No Floating Point Exceptions have been reported + 101,856,642 cycles # 2.646 GHz + 233,656,157 instructions # 2.29 insn per cycle + 0.039147600 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.146549e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.151691e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.151691e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.048254 sec +INFO: No Floating Point Exceptions have been reported + 91,587,165 cycles # 1.768 GHz + 133,174,500 instructions # 1.45 insn per cycle + 0.052446048 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6354) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 2b7efd8bd5..6686b30b4b 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-09-18_19:28:46 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:35:17 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.969887e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.486074e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.488127e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.441981 sec -INFO: No Floating Point Exceptions have been reported - 1,017,798,254 cycles:u # 2.342 GHz (76.18%) - 2,308,984 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.10%) - 6,452,844 stalled-cycles-backend:u # 0.63% backend cycles idle (75.85%) - 1,567,263,249 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (72.74%) - 0.493323757 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.784162e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.808686e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.813201e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.333667 sec +INFO: No Floating Point Exceptions have been reported + 1,240,871,647 cycles # 2.848 GHz + 2,449,109,840 instructions # 1.97 insn per cycle + 0.615625688 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.157579e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.308611e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.309138e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.467491 sec -INFO: No Floating Point Exceptions have been reported - 1,116,307,170 cycles:u # 2.398 GHz (75.15%) - 2,467,208 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.96%) - 13,793,394 stalled-cycles-backend:u # 1.24% backend cycles idle (75.35%) - 1,640,893,198 instructions:u # 1.47 insn per cycle - # 0.01 stalled cycles per insn (73.85%) - 0.524297744 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.062992e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.177330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.186889e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.492547 sec +INFO: No Floating Point Exceptions have been reported + 2,036,892,131 cycles # 2.842 GHz + 3,016,344,751 instructions # 1.48 insn per cycle + 0.776057336 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.525460e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.530238e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.530238e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.118122 sec -INFO: No Floating Point Exceptions have been reported - 360,120,470 cycles:u # 2.981 GHz (73.64%) - 31,355 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.55%) - 44,381,360 stalled-cycles-backend:u # 12.32% backend cycles idle (73.55%) - 1,339,239,764 instructions:u # 3.72 insn per cycle - # 0.03 stalled cycles per insn (73.55%) - 0.124960214 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1603) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.396244e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.399563e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399563e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.156900 sec +INFO: No Floating Point Exceptions have been reported + 468,878,349 cycles # 2.927 GHz + 1,393,744,642 instructions # 2.97 insn per cycle + 0.160773641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.937216e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.955824e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.955824e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.060516 sec -INFO: No Floating Point Exceptions have been reported - 187,016,868 cycles:u # 2.962 GHz (75.81%) - 25,537 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.73%) - 22,517,025 stalled-cycles-backend:u # 12.04% backend cycles idle (74.73%) - 661,818,506 instructions:u # 3.54 insn per cycle - # 0.03 stalled cycles per insn (74.73%) - 0.067401522 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8787) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.703638e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.716215e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.716215e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080002 sec +INFO: No Floating Point Exceptions have been reported + 235,588,650 cycles # 2.827 GHz + 684,259,138 instructions # 2.90 insn per cycle + 0.083821193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.897525e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.905614e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.905614e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.029335 sec -INFO: No Floating Point Exceptions have been reported - 85,082,223 cycles:u # 2.666 GHz (75.07%) - 26,017 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.07%) - 11,186,343 stalled-cycles-backend:u # 13.15% backend cycles idle (75.06%) - 231,761,669 instructions:u # 2.72 insn per cycle - # 0.05 stalled cycles per insn (75.07%) - 0.036167421 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7874) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.433569e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.439450e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.439450e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038120 sec +INFO: No Floating Point Exceptions have been reported + 111,841,703 cycles # 2.696 GHz + 248,650,538 instructions # 2.22 insn per cycle + 0.042017351 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.614208e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621785e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621785e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034000 sec +INFO: No Floating Point Exceptions have been reported + 99,535,427 cycles # 2.668 GHz + 229,238,314 instructions # 2.30 insn per cycle + 0.037858332 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.195361e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.200436e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.200436e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.045690 sec +INFO: No Floating Point Exceptions have been reported + 89,777,680 cycles # 1.821 GHz + 128,604,385 instructions # 1.43 insn per cycle + 0.049950768 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6355) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index dc1aa58764..62aa2351ef 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:27:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:33:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.263984e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167835e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.215993e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.402908 sec -INFO: No Floating Point Exceptions have been reported - 790,493,625 cycles:u # 1.906 GHz (74.68%) - 2,335,099 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.43%) - 6,129,545 stalled-cycles-backend:u # 0.78% backend cycles idle (76.53%) - 1,422,912,336 instructions:u # 1.80 insn per cycle - # 0.00 stalled cycles per insn (74.72%) - 0.465147442 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.107848e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.349751e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.801252e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.525011 sec +INFO: No Floating Point Exceptions have been reported + 2,191,922,668 cycles # 2.881 GHz + 3,124,854,662 instructions # 1.43 insn per cycle + 0.820527123 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.038360e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166278e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166278e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.225705 sec -INFO: No Floating Point Exceptions have been reported - 3,452,727,402 cycles:u # 2.783 GHz (75.04%) - 8,530,310 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.88%) - 12,832,682 stalled-cycles-backend:u # 0.37% backend cycles idle (74.91%) - 9,634,837,721 instructions:u # 2.79 insn per cycle - # 0.00 stalled cycles per insn (74.92%) - 1.245159412 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.117531e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040993e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040993e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.263727 sec +INFO: No Floating Point Exceptions have been reported + 3,735,375,700 cycles # 2.944 GHz + 9,727,971,651 instructions # 2.60 insn per cycle + 1.269703149 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.946379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.485940e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.485940e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.754874 sec -INFO: No Floating Point Exceptions have been reported - 1,995,700,116 cycles:u # 2.590 GHz (75.09%) - 8,309,863 stalled-cycles-frontend:u # 0.42% frontend cycles idle (75.09%) - 9,001,605 stalled-cycles-backend:u # 0.45% backend cycles idle (75.09%) - 5,915,981,455 instructions:u # 2.96 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 0.775038093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1321) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.512691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947484e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947484e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.807869 sec +INFO: No Floating Point Exceptions have been reported + 2,332,400,363 cycles # 2.869 GHz + 5,932,883,831 instructions # 2.54 insn per cycle + 0.813712795 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.023188e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.415258e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.415258e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.561936 sec -INFO: No Floating Point Exceptions have been reported - 1,401,006,195 cycles:u # 2.425 GHz (75.10%) - 8,413,066 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.08%) - 18,327,903 stalled-cycles-backend:u # 1.31% backend cycles idle (75.08%) - 3,284,856,607 instructions:u # 2.34 insn per cycle - # 0.01 stalled cycles per insn (75.20%) - 0.582102691 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.185960e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.183533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.183533e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.594661 sec +INFO: No Floating Point Exceptions have been reported + 1,663,371,411 cycles # 2.773 GHz + 3,314,486,720 instructions # 1.99 insn per cycle + 0.600516021 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.219367e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.251513e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.251513e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.587632 sec +INFO: No Floating Point Exceptions have been reported + 1,614,839,496 cycles # 2.724 GHz + 3,284,546,277 instructions # 2.03 insn per cycle + 0.593339482 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.129616e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.055946e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.055946e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.606967 sec +INFO: No Floating Point Exceptions have been reported + 1,366,903,692 cycles # 2.234 GHz + 2,424,948,880 instructions # 1.77 insn per cycle + 0.612713832 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 4a0aff8aa2..239bb47b8a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:27:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:33:22 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.301680e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.173188e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.222396e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.384632 sec -INFO: No Floating Point Exceptions have been reported - 846,707,623 cycles:u # 2.055 GHz (74.90%) - 2,390,529 stalled-cycles-frontend:u # 0.28% frontend cycles idle (76.43%) - 7,195,648 stalled-cycles-backend:u # 0.85% backend cycles idle (75.75%) - 1,429,395,666 instructions:u # 1.69 insn per cycle - # 0.01 stalled cycles per insn (74.83%) - 0.449705601 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.181260e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.490249e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991797e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.525256 sec +INFO: No Floating Point Exceptions have been reported + 2,186,851,153 cycles # 2.864 GHz + 3,107,286,620 instructions # 1.42 insn per cycle + 0.822741231 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.037912e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165841e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.224624 sec -INFO: No Floating Point Exceptions have been reported - 3,447,640,527 cycles:u # 2.779 GHz (74.92%) - 9,115,739 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.91%) - 13,796,381 stalled-cycles-backend:u # 0.40% backend cycles idle (74.91%) - 9,579,016,894 instructions:u # 2.78 insn per cycle - # 0.00 stalled cycles per insn (74.86%) - 1.245115418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 342) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.043560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.033362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.033362e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.273177 sec +INFO: No Floating Point Exceptions have been reported + 3,715,871,529 cycles # 2.906 GHz + 9,610,590,320 instructions # 2.59 insn per cycle + 1.279195540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.983369e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.535409e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.535409e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.740665 sec -INFO: No Floating Point Exceptions have been reported - 2,003,614,020 cycles:u # 2.649 GHz (74.73%) - 8,694,148 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.64%) - 10,670,384 stalled-cycles-backend:u # 0.53% backend cycles idle (74.62%) - 5,823,125,655 instructions:u # 2.91 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 0.760836257 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1295) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.470593e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877997e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.826368 sec +INFO: No Floating Point Exceptions have been reported + 2,333,894,912 cycles # 2.807 GHz + 5,878,357,831 instructions # 2.52 insn per cycle + 0.832251124 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.015881e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.401925e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.401925e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.562949 sec -INFO: No Floating Point Exceptions have been reported - 1,411,540,720 cycles:u # 2.439 GHz (74.64%) - 8,118,887 stalled-cycles-frontend:u # 0.58% frontend cycles idle (75.12%) - 13,907,036 stalled-cycles-backend:u # 0.99% backend cycles idle (75.13%) - 3,244,629,671 instructions:u # 2.30 insn per cycle - # 0.00 stalled cycles per insn (75.13%) - 0.583279083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1418) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.242144e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.308218e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.308218e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.580800 sec +INFO: No Floating Point Exceptions have been reported + 1,655,777,920 cycles # 2.827 GHz + 3,287,720,584 instructions # 1.99 insn per cycle + 0.586391271 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.289151e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.391409e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.391409e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.570490 sec +INFO: No Floating Point Exceptions have been reported + 1,622,799,576 cycles # 2.819 GHz + 3,260,934,090 instructions # 2.01 insn per cycle + 0.576408659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.147175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094895e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094895e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.601115 sec +INFO: No Floating Point Exceptions have been reported + 1,376,859,663 cycles # 2.272 GHz + 2,409,979,343 instructions # 1.75 insn per cycle + 0.607114374 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index f6758d0f37..d290e84a6a 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:27:51 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:33:34 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.769393e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.430907e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.483413e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 -TOTAL : 0.524863 sec -INFO: No Floating Point Exceptions have been reported - 1,237,747,185 cycles:u # 2.253 GHz (75.00%) - 3,461,109 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.64%) - 17,464,996 stalled-cycles-backend:u # 1.41% backend cycles idle (75.10%) - 1,400,351,263 instructions:u # 1.13 insn per cycle - # 0.01 stalled cycles per insn (74.71%) - 0.590966926 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.032821e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.078089e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.480611e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.485441 sec +INFO: No Floating Point Exceptions have been reported + 2,051,454,700 cycles # 2.873 GHz + 2,936,249,934 instructions # 1.43 insn per cycle + 0.771058253 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771957969060168 -Relative difference = 5.394724574150425e-07 +Avg ME (F77/GPU) = 0.14771956735057756 +Relative difference = 4.559355911674916e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.214791e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.389631e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.389631e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 1.030266 sec -INFO: No Floating Point Exceptions have been reported - 2,979,702,508 cycles:u # 2.865 GHz (74.72%) - 6,889,472 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.69%) - 5,380,573 stalled-cycles-backend:u # 0.18% backend cycles idle (74.65%) - 9,455,387,314 instructions:u # 3.17 insn per cycle - # 0.00 stalled cycles per insn (75.02%) - 1.044658171 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 432) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.100530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045913e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.244009 sec +INFO: No Floating Point Exceptions have been reported + 3,662,603,595 cycles # 2.932 GHz + 9,601,734,780 instructions # 2.62 insn per cycle + 1.249887433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.120124e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665555e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665555e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 -TOTAL : 0.506645 sec -INFO: No Floating Point Exceptions have been reported - 1,377,549,413 cycles:u # 2.666 GHz (74.94%) - 6,765,579 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.25%) - 18,960,448 stalled-cycles-backend:u # 1.38% backend cycles idle (75.25%) - 3,820,223,249 instructions:u # 2.77 insn per cycle - # 0.00 stalled cycles per insn (75.24%) - 0.521032641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1513) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.260293e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.450195e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.450195e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.554122 sec +INFO: No Floating Point Exceptions have been reported + 1,637,956,120 cycles # 2.928 GHz + 3,967,181,530 instructions # 2.42 insn per cycle + 0.560033790 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955448668450 -Relative difference = 3.081061382869002e-07 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.107571e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.074889e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.074889e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.425809 sec -INFO: No Floating Point Exceptions have been reported - 1,107,075,808 cycles:u # 2.540 GHz (74.47%) - 5,577,417 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.49%) - 9,015,200 stalled-cycles-backend:u # 0.81% backend cycles idle (74.47%) - 2,411,169,385 instructions:u # 2.18 insn per cycle - # 0.00 stalled cycles per insn (75.37%) - 0.440226538 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.018941e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.312758e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.312758e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.436114 sec +INFO: No Floating Point Exceptions have been reported + 1,253,193,980 cycles # 2.841 GHz + 2,497,513,333 instructions # 1.99 insn per cycle + 0.441707702 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955128526315 -Relative difference = 3.2977842382139064e-07 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.105058e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.563425e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.563425e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.426869 sec +INFO: No Floating Point Exceptions have been reported + 1,223,516,570 cycles # 2.834 GHz + 2,473,072,662 instructions # 2.02 insn per cycle + 0.432489185 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1870) (512y: 1) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.875374e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829234e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829234e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.455920 sec +INFO: No Floating Point Exceptions have been reported + 1,079,442,551 cycles # 2.341 GHz + 2,072,975,829 instructions # 1.92 insn per cycle + 0.461745309 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 62b65a6b6c..12dbe0a7bb 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:27:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:33:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.521701e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.724716e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.778140e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 -TOTAL : 0.358513 sec -INFO: No Floating Point Exceptions have been reported - 813,170,961 cycles:u # 2.136 GHz (72.50%) - 2,461,471 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.24%) - 5,856,414 stalled-cycles-backend:u # 0.72% backend cycles idle (74.75%) - 1,420,371,307 instructions:u # 1.75 insn per cycle - # 0.00 stalled cycles per insn (75.73%) - 0.417668225 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.057555e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.155700e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.563343e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.485884 sec +INFO: No Floating Point Exceptions have been reported + 2,043,628,192 cycles # 2.869 GHz + 2,916,801,925 instructions # 1.43 insn per cycle + 0.771023658 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771957969060168 -Relative difference = 5.394724574150425e-07 +Avg ME (F77/GPU) = 0.14771956525510177 +Relative difference = 4.4175008557828484e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.219672e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.397516e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.397516e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 1.029273 sec -INFO: No Floating Point Exceptions have been reported - 2,974,795,216 cycles:u # 2.862 GHz (74.68%) - 6,707,863 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.67%) - 9,343,346 stalled-cycles-backend:u # 0.31% backend cycles idle (74.78%) - 9,323,790,456 instructions:u # 3.13 insn per cycle - # 0.00 stalled cycles per insn (75.16%) - 1.043606611 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 337) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.191416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.056446e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.056446e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.229162 sec +INFO: No Floating Point Exceptions have been reported + 3,623,698,938 cycles # 2.936 GHz + 9,471,242,034 instructions # 2.61 insn per cycle + 1.234707648 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.126940e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.665561e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.665561e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 -TOTAL : 0.503144 sec -INFO: No Floating Point Exceptions have been reported - 1,374,855,013 cycles:u # 2.680 GHz (74.86%) - 7,264,560 stalled-cycles-frontend:u # 0.53% frontend cycles idle (75.06%) - 8,850,995 stalled-cycles-backend:u # 0.64% backend cycles idle (75.06%) - 3,796,152,939 instructions:u # 2.76 insn per cycle - # 0.00 stalled cycles per insn (75.06%) - 0.517448809 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1479) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.264406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.455240e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455240e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.551418 sec +INFO: No Floating Point Exceptions have been reported + 1,633,608,321 cycles # 2.938 GHz + 3,933,410,721 instructions # 2.41 insn per cycle + 0.556738925 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955448668450 -Relative difference = 3.081061382869002e-07 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.106438e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.074111e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.074111e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.425530 sec -INFO: No Floating Point Exceptions have been reported - 1,103,180,205 cycles:u # 2.533 GHz (74.31%) - 6,001,306 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.45%) - 37,751,830 stalled-cycles-backend:u # 3.42% backend cycles idle (74.46%) - 2,427,372,520 instructions:u # 2.20 insn per cycle - # 0.02 stalled cycles per insn (74.66%) - 0.439801504 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1802) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.014495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.293948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.293948e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.435710 sec +INFO: No Floating Point Exceptions have been reported + 1,251,845,572 cycles # 2.841 GHz + 2,481,653,408 instructions # 1.98 insn per cycle + 0.441241697 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1817) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955128526315 -Relative difference = 3.2977842382139064e-07 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.125464e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.603160e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.603160e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.424252 sec +INFO: No Floating Point Exceptions have been reported + 1,222,912,229 cycles # 2.849 GHz + 2,456,305,937 instructions # 2.01 insn per cycle + 0.429917564 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1773) (512y: 1) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.934438e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.010088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.010088e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.446349 sec +INFO: No Floating Point Exceptions have been reported + 1,072,792,214 cycles # 2.378 GHz + 2,057,138,403 instructions # 1.92 insn per cycle + 0.451920157 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 12c3ce295c..8f7e2917bf 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:28:04 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:33:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.307714e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.169984e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.218366e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.385223 sec -INFO: No Floating Point Exceptions have been reported - 806,181,073 cycles:u # 1.952 GHz (74.18%) - 2,359,397 stalled-cycles-frontend:u # 0.29% frontend cycles idle (72.98%) - 9,657,871 stalled-cycles-backend:u # 1.20% backend cycles idle (75.31%) - 1,435,597,807 instructions:u # 1.78 insn per cycle - # 0.01 stalled cycles per insn (77.22%) - 0.450161349 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.090014e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319571e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.751667e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.522308 sec +INFO: No Floating Point Exceptions have been reported + 2,179,170,623 cycles # 2.882 GHz + 3,109,984,327 instructions # 1.43 insn per cycle + 0.814646692 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.032746e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.158591e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.158591e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.232523 sec -INFO: No Floating Point Exceptions have been reported - 3,470,920,803 cycles:u # 2.779 GHz (74.96%) - 8,564,896 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.02%) - 5,964,520 stalled-cycles-backend:u # 0.17% backend cycles idle (75.02%) - 9,634,206,038 instructions:u # 2.78 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 1.253147920 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.952529e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.022459e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022459e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.284601 sec +INFO: No Floating Point Exceptions have been reported + 3,782,838,045 cycles # 2.933 GHz + 9,753,328,321 instructions # 2.58 insn per cycle + 1.290389924 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.997756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.578768e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.578768e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.743253 sec -INFO: No Floating Point Exceptions have been reported - 1,959,017,066 cycles:u # 2.579 GHz (74.82%) - 7,999,445 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.82%) - 12,794,708 stalled-cycles-backend:u # 0.65% backend cycles idle (74.73%) - 5,899,724,502 instructions:u # 3.01 insn per cycle - # 0.00 stalled cycles per insn (74.73%) - 0.763954796 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1383) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.563360e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.027715e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027715e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.783750 sec +INFO: No Floating Point Exceptions have been reported + 2,313,452,686 cycles # 2.933 GHz + 5,920,736,181 instructions # 2.56 insn per cycle + 0.789531453 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.086028e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.556292e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.556292e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.556429 sec -INFO: No Floating Point Exceptions have been reported - 1,370,947,632 cycles:u # 2.393 GHz (75.07%) - 7,972,174 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.87%) - 14,848,905 stalled-cycles-backend:u # 1.08% backend cycles idle (74.99%) - 3,233,666,236 instructions:u # 2.36 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 0.577262695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1546) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.274544e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.372577e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.372577e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.574756 sec +INFO: No Floating Point Exceptions have been reported + 1,639,105,587 cycles # 2.827 GHz + 3,253,580,218 instructions # 1.98 insn per cycle + 0.580508158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956675526976 -Relative difference = 2.2505293980258705e-07 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.338032e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.481810e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481810e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.561846 sec +INFO: No Floating Point Exceptions have been reported + 1,602,124,528 cycles # 2.826 GHz + 3,209,983,521 instructions # 2.00 insn per cycle + 0.567621873 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.198566e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.176156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.176156e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.590094 sec +INFO: No Floating Point Exceptions have been reported + 1,347,708,343 cycles # 2.265 GHz + 2,376,834,038 instructions # 1.76 insn per cycle + 0.595752442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index c45e743959..856901d743 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-09-18_19:28:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:34:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.804264e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.167273e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.215068e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.412741 sec -INFO: No Floating Point Exceptions have been reported - 824,415,251 cycles:u # 1.980 GHz (75.53%) - 2,468,925 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.32%) - 8,503,324 stalled-cycles-backend:u # 1.03% backend cycles idle (76.14%) - 1,419,988,872 instructions:u # 1.72 insn per cycle - # 0.01 stalled cycles per insn (76.40%) - 0.481430048 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.212511e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510212e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.023434e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.523683 sec +INFO: No Floating Point Exceptions have been reported + 2,161,000,888 cycles # 2.849 GHz + 3,093,780,518 instructions # 1.43 insn per cycle + 0.816657446 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.028761e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154688e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.154688e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.237143 sec -INFO: No Floating Point Exceptions have been reported - 3,501,274,647 cycles:u # 2.792 GHz (74.66%) - 8,331,258 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.93%) - 10,458,091 stalled-cycles-backend:u # 0.30% backend cycles idle (75.13%) - 9,540,303,436 instructions:u # 2.72 insn per cycle - # 0.00 stalled cycles per insn (75.13%) - 1.258419856 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 343) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.006386e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.027076e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.027076e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.275858 sec +INFO: No Floating Point Exceptions have been reported + 3,759,691,883 cycles # 2.936 GHz + 9,643,680,583 instructions # 2.57 insn per cycle + 1.281474685 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.005056e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.585052e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.585052e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.740844 sec -INFO: No Floating Point Exceptions have been reported - 1,964,814,058 cycles:u # 2.594 GHz (74.69%) - 8,089,559 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.69%) - 13,788,218 stalled-cycles-backend:u # 0.70% backend cycles idle (74.72%) - 5,865,914,908 instructions:u # 2.99 insn per cycle - # 0.00 stalled cycles per insn (74.82%) - 0.761884120 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1353) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.517196e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.947819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.947819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.804148 sec +INFO: No Floating Point Exceptions have been reported + 2,322,905,849 cycles # 2.871 GHz + 5,850,527,655 instructions # 2.52 insn per cycle + 0.809789330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.078459e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.540950e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.540950e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.558827 sec -INFO: No Floating Point Exceptions have been reported - 1,380,738,015 cycles:u # 2.399 GHz (75.02%) - 8,097,320 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.99%) - 23,334,783 stalled-cycles-backend:u # 1.69% backend cycles idle (74.99%) - 3,206,215,655 instructions:u # 2.32 insn per cycle - # 0.01 stalled cycles per insn (75.11%) - 0.580226895 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1487) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.254780e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333242e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333242e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.577840 sec +INFO: No Floating Point Exceptions have been reported + 1,650,198,876 cycles # 2.831 GHz + 3,216,570,367 instructions # 1.95 insn per cycle + 0.583563842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956675526976 -Relative difference = 2.2505293980258705e-07 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.314025e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.454653e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.454653e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.565918 sec +INFO: No Floating Point Exceptions have been reported + 1,600,538,363 cycles # 2.803 GHz + 3,181,550,003 instructions # 1.99 insn per cycle + 0.571587963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.185908e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.142994e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.142994e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.592175 sec +INFO: No Floating Point Exceptions have been reported + 1,356,716,498 cycles # 2.272 GHz + 2,361,264,569 instructions # 1.74 insn per cycle + 0.597815792 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1056) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 03f48ac4eb..99516e3f65 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:26:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:30:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.972005e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.492614e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.513911e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.456275 sec -INFO: No Floating Point Exceptions have been reported - 948,437,393 cycles:u # 2.054 GHz (73.91%) - 2,505,859 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.62%) - 7,088,010 stalled-cycles-backend:u # 0.75% backend cycles idle (76.35%) - 1,517,377,285 instructions:u # 1.60 insn per cycle - # 0.00 stalled cycles per insn (76.19%) - 0.650445358 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.206537e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.286021e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.966943e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.539704 sec +INFO: No Floating Point Exceptions have been reported + 2,208,534,555 cycles # 2.845 GHz + 3,150,536,398 instructions # 1.43 insn per cycle + 0.835623159 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195553 -Relative difference = 6.616631755314852e-08 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.218247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.268669e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.268669e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.937872 sec -INFO: No Floating Point Exceptions have been reported - 14,977,288,401 cycles:u # 3.026 GHz (74.97%) - 10,353,628 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.93%) - 3,049,842,670 stalled-cycles-backend:u # 20.36% backend cycles idle (74.96%) - 45,597,437,018 instructions:u # 3.04 insn per cycle - # 0.07 stalled cycles per insn (74.95%) - 5.024039428 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.822612e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.869779e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869779e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.861835 sec +INFO: No Floating Point Exceptions have been reported + 17,248,615,219 cycles # 2.940 GHz + 45,920,744,006 instructions # 2.66 insn per cycle + 5.867505238 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194411 -Relative difference = 6.616637417031725e-08 +Avg ME (F77/C++) = 2.0158358666194407 +Relative difference = 6.616637439061751e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.845460e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.016940e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.016940e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.935896 sec -INFO: No Floating Point Exceptions have been reported - 8,794,033,219 cycles:u # 2.985 GHz (75.05%) - 8,968,587 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) - 2,713,571,872 stalled-cycles-backend:u # 30.86% backend cycles idle (75.03%) - 27,707,029,536 instructions:u # 3.15 insn per cycle - # 0.10 stalled cycles per insn (75.04%) - 3.044904707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.157644e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.314617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.314617e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.430104 sec +INFO: No Floating Point Exceptions have been reported + 10,035,725,674 cycles # 2.922 GHz + 27,802,903,324 instructions # 2.77 insn per cycle + 3.435933108 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.459220e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.926351e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.926351e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.835055 sec -INFO: No Floating Point Exceptions have been reported - 5,333,169,296 cycles:u # 2.893 GHz (74.99%) - 8,715,004 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.89%) - 560,049,655 stalled-cycles-backend:u # 10.50% backend cycles idle (74.90%) - 12,436,304,398 instructions:u # 2.33 insn per cycle - # 0.05 stalled cycles per insn (74.84%) - 1.955858148 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2492) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.941289e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.318652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.318652e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.231098 sec +INFO: No Floating Point Exceptions have been reported + 6,101,804,369 cycles # 2.729 GHz + 12,586,990,350 instructions # 2.06 insn per cycle + 2.237005738 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.519324e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.987161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.987161e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.007245 sec +INFO: No Floating Point Exceptions have been reported + 5,563,695,868 cycles # 2.765 GHz + 12,000,166,171 instructions # 2.16 insn per cycle + 2.013040788 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2365) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.502694e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.684435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.684435e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.101349 sec +INFO: No Floating Point Exceptions have been reported + 5,749,698,258 cycles # 1.851 GHz + 8,343,640,860 instructions # 1.45 insn per cycle + 3.107135736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 5c1904fff2..1f4bfaf624 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:26:32 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:31:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.962969e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.474341e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.495413e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.463107 sec -INFO: No Floating Point Exceptions have been reported - 940,768,689 cycles:u # 2.050 GHz (76.06%) - 2,496,325 stalled-cycles-frontend:u # 0.27% frontend cycles idle (76.11%) - 5,742,181 stalled-cycles-backend:u # 0.61% backend cycles idle (74.90%) - 1,618,691,327 instructions:u # 1.72 insn per cycle - # 0.00 stalled cycles per insn (73.75%) - 0.637262581 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.340722e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.356922e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.992900e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.532286 sec +INFO: No Floating Point Exceptions have been reported + 2,205,060,845 cycles # 2.868 GHz + 3,167,717,935 instructions # 1.44 insn per cycle + 0.825884785 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195553 -Relative difference = 6.616631755314852e-08 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.367031e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.426766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.426766e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.635931 sec -INFO: No Floating Point Exceptions have been reported - 14,052,570,920 cycles:u # 3.021 GHz (75.02%) - 8,949,168 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 2,267,268,134 stalled-cycles-backend:u # 16.13% backend cycles idle (74.90%) - 44,497,614,973 instructions:u # 3.17 insn per cycle - # 0.05 stalled cycles per insn (74.95%) - 4.735362364 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.873402e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.922894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922894e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.705020 sec +INFO: No Floating Point Exceptions have been reported + 16,751,892,515 cycles # 2.934 GHz + 44,906,929,991 instructions # 2.68 insn per cycle + 5.710885629 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.086943e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.280498e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.280498e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.789988 sec -INFO: No Floating Point Exceptions have been reported - 8,296,693,720 cycles:u # 2.976 GHz (75.04%) - 9,485,272 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.04%) - 1,459,624,763 stalled-cycles-backend:u # 17.59% backend cycles idle (75.05%) - 26,682,455,448 instructions:u # 3.22 insn per cycle - # 0.05 stalled cycles per insn (75.07%) - 2.873886609 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2278) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.361567e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.536177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536177e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.227440 sec +INFO: No Floating Point Exceptions have been reported + 9,512,762,540 cycles # 2.943 GHz + 26,678,539,109 instructions # 2.80 insn per cycle + 3.233163450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.808830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.184155e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.184155e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.011572 sec -INFO: No Floating Point Exceptions have been reported - 5,961,619,484 cycles:u # 2.942 GHz (74.77%) - 9,990,396 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.94%) - 1,230,778,826 stalled-cycles-backend:u # 20.65% backend cycles idle (75.14%) - 14,121,190,623 instructions:u # 2.37 insn per cycle - # 0.09 stalled cycles per insn (75.14%) - 2.114162326 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2700) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.604596e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.927835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.927835e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.385363 sec +INFO: No Floating Point Exceptions have been reported + 6,599,025,301 cycles # 2.760 GHz + 14,108,971,201 instructions # 2.14 insn per cycle + 2.391489598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2705) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.791684e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.138771e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.138771e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.294784 sec +INFO: No Floating Point Exceptions have been reported + 6,350,789,081 cycles # 2.762 GHz + 13,712,967,214 instructions # 2.16 insn per cycle + 2.300513281 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 298) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.371675e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.540530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.540530e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.217988 sec +INFO: No Floating Point Exceptions have been reported + 5,939,821,646 cycles # 1.843 GHz + 10,101,817,070 instructions # 1.70 insn per cycle + 3.223668588 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index f7b829d7ca..1a672b74ce 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:26:47 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:31:37 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.980538e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098565e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120913e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 -TOTAL : 0.379471 sec -INFO: No Floating Point Exceptions have been reported - 829,738,360 cycles:u # 2.119 GHz (74.46%) - 2,361,328 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.50%) - 12,584,293 stalled-cycles-backend:u # 1.52% backend cycles idle (75.50%) - 1,463,399,939 instructions:u # 1.76 insn per cycle - # 0.01 stalled cycles per insn (76.48%) - 0.572907708 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.264093e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.766977e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.882650e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.489615 sec +INFO: No Floating Point Exceptions have been reported + 2,060,695,462 cycles # 2.874 GHz + 2,961,708,283 instructions # 1.44 insn per cycle + 0.774445109 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.015844e+00 -Avg ME (F77/GPU) = 2.0158466693246737 -Relative difference = 1.3241722443517625e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.463914e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.528816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.528816e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 4.436058 sec -INFO: No Floating Point Exceptions have been reported - 13,514,597,589 cycles:u # 3.049 GHz (74.84%) - 8,749,737 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.84%) - 2,777,186,527 stalled-cycles-backend:u # 20.55% backend cycles idle (75.00%) - 45,506,266,055 instructions:u # 3.37 insn per cycle - # 0.06 stalled cycles per insn (75.09%) - 4.529851721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 667) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.937524e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.992418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992418e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.498876 sec +INFO: No Floating Point Exceptions have been reported + 16,211,815,789 cycles # 2.946 GHz + 45,319,917,505 instructions # 2.80 insn per cycle + 5.504546294 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491450129077 -Relative difference = 7.193639399772436e-08 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.562972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.910480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.910480e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 2.067754 sec -INFO: No Floating Point Exceptions have been reported - 6,153,571,441 cycles:u # 2.994 GHz (74.92%) - 6,628,536 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.80%) - 2,561,584,960 stalled-cycles-backend:u # 41.63% backend cycles idle (74.91%) - 17,076,572,379 instructions:u # 2.78 insn per cycle - # 0.15 stalled cycles per insn (75.10%) - 2.144863506 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2902) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.533229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.869354e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.869354e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.401545 sec +INFO: No Floating Point Exceptions have been reported + 7,056,760,375 cycles # 2.932 GHz + 17,791,878,594 instructions # 2.52 insn per cycle + 2.407391534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158492142800242 -Relative difference = 1.0629765641719438e-07 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.054455e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180793e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.180793e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 -TOTAL : 1.177516 sec -INFO: No Floating Point Exceptions have been reported - 3,381,876,587 cycles:u # 2.889 GHz (74.79%) - 7,165,420 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.76%) - 828,613,511 stalled-cycles-backend:u # 24.50% backend cycles idle (75.05%) - 8,048,609,709 instructions:u # 2.38 insn per cycle - # 0.10 stalled cycles per insn (75.39%) - 1.295862118 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3258) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.087610e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.152694e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.152694e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.383748 sec +INFO: No Floating Point Exceptions have been reported + 3,839,977,803 cycles # 2.765 GHz + 8,262,037,377 instructions # 2.15 insn per cycle + 1.389311013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3371) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158479403471574 -Relative difference = 2.9591934841076347e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.847495e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.011837e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.011837e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.271772 sec +INFO: No Floating Point Exceptions have been reported + 3,548,498,858 cycles # 2.779 GHz + 7,914,474,526 instructions # 2.23 insn per cycle + 1.277559305 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3214) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.536546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.195032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.195032e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.690671 sec +INFO: No Floating Point Exceptions have been reported + 3,256,995,213 cycles # 1.921 GHz + 6,100,882,884 instructions # 1.87 insn per cycle + 1.696260075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2258) (512y: 22) (512z: 2156) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 87536c8e20..d3b2f0408f 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:26:59 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:31:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.115283e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.129315e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.151880e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 -TOTAL : 0.386185 sec -INFO: No Floating Point Exceptions have been reported - 803,193,561 cycles:u # 2.058 GHz (75.44%) - 2,329,368 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.20%) - 12,768,156 stalled-cycles-backend:u # 1.59% backend cycles idle (75.32%) - 1,467,124,445 instructions:u # 1.83 insn per cycle - # 0.01 stalled cycles per insn (75.46%) - 0.605071110 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.208288e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.783701e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.898530e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.487345 sec +INFO: No Floating Point Exceptions have been reported + 2,044,938,895 cycles # 2.858 GHz + 2,894,501,323 instructions # 1.42 insn per cycle + 0.773252899 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.015844e+00 -Avg ME (F77/GPU) = 2.0158466693246737 -Relative difference = 1.3241722443517625e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.684271e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.761449e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.761449e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 4.072075 sec -INFO: No Floating Point Exceptions have been reported - 12,446,264,441 cycles:u # 3.050 GHz (74.92%) - 7,172,343 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.93%) - 1,595,583,580 stalled-cycles-backend:u # 12.82% backend cycles idle (74.93%) - 44,294,289,533 instructions:u # 3.56 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 4.105681463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.963294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.019360e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019360e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.426510 sec +INFO: No Floating Point Exceptions have been reported + 15,955,926,327 cycles # 2.938 GHz + 44,427,771,107 instructions # 2.78 insn per cycle + 5.431874949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491450129077 -Relative difference = 7.193639399772436e-08 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.085417e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.509200e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.509200e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 1.889702 sec -INFO: No Floating Point Exceptions have been reported - 5,642,254,450 cycles:u # 2.971 GHz (74.99%) - 6,365,297 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.15%) - 1,763,423,250 stalled-cycles-backend:u # 31.25% backend cycles idle (75.15%) - 16,907,851,890 instructions:u # 3.00 insn per cycle - # 0.10 stalled cycles per insn (75.15%) - 1.964939519 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2752) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.335493e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.807156e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.807156e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.051564 sec +INFO: No Floating Point Exceptions have been reported + 6,058,187,563 cycles # 2.946 GHz + 17,074,725,200 instructions # 2.82 insn per cycle + 2.057140058 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158492142800242 -Relative difference = 1.0629765641719438e-07 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.869022e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.552438e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.552438e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 -TOTAL : 1.498616 sec -INFO: No Floating Point Exceptions have been reported - 4,448,501,705 cycles:u # 2.949 GHz (74.88%) - 7,315,338 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.84%) - 1,680,475,755 stalled-cycles-backend:u # 37.78% backend cycles idle (74.98%) - 10,195,091,217 instructions:u # 2.29 insn per cycle - # 0.16 stalled cycles per insn (75.08%) - 1.541118768 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3884) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.066914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.644109e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.644109e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.813330 sec +INFO: No Floating Point Exceptions have been reported + 5,026,891,048 cycles # 2.765 GHz + 10,223,175,449 instructions # 2.03 insn per cycle + 1.818918027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158479403471574 -Relative difference = 2.9591934841076347e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.155601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.742490e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.742490e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.789620 sec +INFO: No Floating Point Exceptions have been reported + 4,970,225,584 cycles # 2.770 GHz + 9,994,978,881 instructions # 2.01 insn per cycle + 1.795236203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.666448e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.992729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.992729e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 2.333237 sec +INFO: No Floating Point Exceptions have been reported + 4,367,486,322 cycles # 1.868 GHz + 8,444,271,998 instructions # 1.93 insn per cycle + 2.338821094 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 39a1b0d89f..c1f4bb8132 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:27:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:32:20 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.934496e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433672e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.453957e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.423359 sec -INFO: No Floating Point Exceptions have been reported - 984,376,123 cycles:u # 2.183 GHz (74.98%) - 2,542,046 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.21%) - 5,049,320 stalled-cycles-backend:u # 0.51% backend cycles idle (75.63%) - 1,505,653,920 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (75.61%) - 0.485761756 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.373966e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.408476e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005223e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.533114 sec +INFO: No Floating Point Exceptions have been reported + 2,212,396,057 cycles # 2.876 GHz + 3,189,695,931 instructions # 1.44 insn per cycle + 0.826417249 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.275227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.330911e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.330911e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.810598 sec -INFO: No Floating Point Exceptions have been reported - 14,609,690,701 cycles:u # 3.028 GHz (74.90%) - 8,816,517 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) - 2,840,211,842 stalled-cycles-backend:u # 19.44% backend cycles idle (74.97%) - 45,713,684,667 instructions:u # 3.13 insn per cycle - # 0.06 stalled cycles per insn (75.03%) - 4.829850527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.812942e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.859362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.859362e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.891473 sec +INFO: No Floating Point Exceptions have been reported + 17,373,992,128 cycles # 2.947 GHz + 46,072,043,013 instructions # 2.65 insn per cycle + 5.897196721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.828976e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.997993e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.997993e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.944861 sec -INFO: No Floating Point Exceptions have been reported - 8,818,778,594 cycles:u # 2.980 GHz (74.92%) - 9,374,426 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.86%) - 2,768,129,590 stalled-cycles-backend:u # 31.39% backend cycles idle (74.86%) - 27,575,612,310 instructions:u # 3.13 insn per cycle - # 0.10 stalled cycles per insn (74.98%) - 2.963974282 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.226094e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.386425e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386425e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.358561 sec +INFO: No Floating Point Exceptions have been reported + 9,911,091,884 cycles # 2.947 GHz + 27,587,758,232 instructions # 2.78 insn per cycle + 3.364358964 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.059239e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.581260e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.581260e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.947799 sec -INFO: No Floating Point Exceptions have been reported - 5,716,686,193 cycles:u # 2.913 GHz (74.74%) - 8,513,700 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.90%) - 1,359,585,869 stalled-cycles-backend:u # 23.78% backend cycles idle (75.10%) - 12,236,791,945 instructions:u # 2.14 insn per cycle - # 0.11 stalled cycles per insn (75.14%) - 1.966967622 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2671) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.044961e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.439076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.439076e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.186755 sec +INFO: No Floating Point Exceptions have been reported + 6,022,763,481 cycles # 2.748 GHz + 12,488,130,017 instructions # 2.07 insn per cycle + 2.192467039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359151896224 -Relative difference = 4.20720623263505e-08 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.596506e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.079685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.079685e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 1.980331 sec +INFO: No Floating Point Exceptions have been reported + 5,504,974,873 cycles # 2.773 GHz + 11,923,154,801 instructions # 2.17 insn per cycle + 1.986372291 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.610025e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802161e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.802161e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.013333 sec +INFO: No Floating Point Exceptions have been reported + 5,617,715,088 cycles # 1.861 GHz + 8,110,898,143 instructions # 1.44 insn per cycle + 3.019371634 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1865) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index ede8c1d2c7..744bfec9d4 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-09-18_19:27:25 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan01 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-09-18_13:32:45 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [clang 17.0.0] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.000593e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.522978e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.544792e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.427450 sec -INFO: No Floating Point Exceptions have been reported - 965,106,560 cycles:u # 2.118 GHz (75.70%) - 2,488,206 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.42%) - 10,996,365 stalled-cycles-backend:u # 1.14% backend cycles idle (75.48%) - 1,577,569,884 instructions:u # 1.63 insn per cycle - # 0.01 stalled cycles per insn (74.55%) - 0.489792643 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.356227e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.388949e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002637e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.530171 sec +INFO: No Floating Point Exceptions have been reported + 2,205,062,942 cycles # 2.875 GHz + 3,154,626,469 instructions # 1.43 insn per cycle + 0.823696592 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.338951e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.397942e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.397942e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.686230 sec -INFO: No Floating Point Exceptions have been reported - 14,307,115,528 cycles:u # 3.044 GHz (74.99%) - 8,712,906 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) - 780,057,400 stalled-cycles-backend:u # 5.45% backend cycles idle (75.00%) - 44,583,603,957 instructions:u # 3.12 insn per cycle - # 0.02 stalled cycles per insn (74.99%) - 4.705408961 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.861428e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.909561e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.909561e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.739728 sec +INFO: No Floating Point Exceptions have been reported + 16,938,834,117 cycles # 2.949 GHz + 45,091,140,717 instructions # 2.66 insn per cycle + 5.745446347 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.124854e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.322609e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.322609e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.748390 sec -INFO: No Floating Point Exceptions have been reported - 8,264,613,718 cycles:u # 2.991 GHz (74.84%) - 9,568,431 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.83%) - 1,106,363,678 stalled-cycles-backend:u # 13.39% backend cycles idle (74.98%) - 26,285,216,045 instructions:u # 3.18 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 2.767313701 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2311) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.325491e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.496074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496074e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.260792 sec +INFO: No Floating Point Exceptions have been reported + 9,505,160,256 cycles # 2.910 GHz + 26,249,919,899 instructions # 2.76 insn per cycle + 3.266614954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.733806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.099178e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.099178e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.034839 sec -INFO: No Floating Point Exceptions have been reported - 5,999,141,474 cycles:u # 2.927 GHz (75.02%) - 8,863,145 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.02%) - 1,779,427,123 stalled-cycles-backend:u # 29.66% backend cycles idle (75.02%) - 13,978,554,557 instructions:u # 2.33 insn per cycle - # 0.13 stalled cycles per insn (75.06%) - 2.053844830 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2870) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.459875e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.763541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.763541e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.458814 sec +INFO: No Floating Point Exceptions have been reported + 6,750,977,111 cycles # 2.740 GHz + 14,029,286,718 instructions # 2.08 insn per cycle + 2.464538527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359151896224 -Relative difference = 4.20720623263505e-08 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.781257e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.129375e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.129375e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.300139 sec +INFO: No Floating Point Exceptions have been reported + 6,382,631,497 cycles # 2.769 GHz + 13,515,067,929 instructions # 2.12 insn per cycle + 2.305941749 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.602901e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.797238e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.797238e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.017121 sec +INFO: No Floating Point Exceptions have been reported + 5,589,518,345 cycles # 1.850 GHz + 9,206,594,679 instructions # 1.65 insn per cycle + 3.022936699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED From 6eea88941eb14a024bdb5537e0399802470f8bef Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 23 Sep 2024 12:01:50 +0200 Subject: [PATCH 64/76] removed superfluous makefile, added default backend that prioritises gpu backends over cpp using the original error logic for CUDA or HIP --- .../template_files/gpu/cudacpp_config.mk | 15 +- .../template_files/gpu/cudacpp_rex_driver.mk | 1048 ---------------- .../template_files/gpu/cudacpp_rex_runner.mk | 1051 ----------------- 3 files changed, 14 insertions(+), 2100 deletions(-) delete mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk delete mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk index 438fcd1661..763f12e830 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk @@ -10,7 +10,20 @@ # Set the default BACKEND (CUDA, HIP or C++/SIMD) choice ifeq ($(BACKEND),) - override BACKEND = cppauto + override BACKEND = gpucpp +endif + +# Stop immediately if BACKEND=cuda but nvcc is missing +ifeq ($(BACKEND),gpucpp) + ifeq ($(shell which nvcc 2>/dev/null),) + ifeq ($(shell which hipcc 2>/dev/null),) + override BACKEND = cppauto + else + override BACKEND = hip + endif + else + override BACKEND = cuda + endif endif # Set the default FPTYPE (floating point type) choice diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk deleted file mode 100644 index 9889da9575..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_driver.mk +++ /dev/null @@ -1,1048 +0,0 @@ -# Copyright (C) 2020-2023 CERN and UCLouvain. -# Licensed under the GNU Lesser General Public License (version 3 or later). -# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. - -#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts -#=== NB: use 'override' to ensure that the value can not be modified from the outside -override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') - -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories -override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk - -#------------------------------------------------------------------------------- - -#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) - -SHELL := /bin/bash - -#------------------------------------------------------------------------------- - -#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) - -# Detect O/S kernel (Linux, Darwin...) -UNAME_S := $(shell uname -s) -###$(info UNAME_S='$(UNAME_S)') - -# Detect architecture (x86_64, ppc64le...) -UNAME_P := $(shell uname -p) -###$(info UNAME_P='$(UNAME_P)') - -#------------------------------------------------------------------------------- - -#=== Include the common MG5aMC Makefile options - -# OM: this is crucial for MG5aMC flag consistency/documentation -# AV: temporarely comment this out because it breaks cudacpp builds -ifneq ($(wildcard ../Source/make_opts),) -include ../Source/make_opts -endif - -#------------------------------------------------------------------------------- - -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) -override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) -override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else -override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../test -TESTDIRLOCAL = ../test -ifneq ($(wildcard $(GTEST_ROOT)),) -TESTDIR = -else ifneq ($(LOCALGTEST),) -TESTDIR=$(TESTDIRLOCAL) -GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../epochX/cudacpp/CODEGEN),) -TESTDIR = $(TESTDIRCOMMON) -GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else -TESTDIR = -endif -ifneq ($(GTEST_ROOT),) -GTESTLIBDIR = $(GTEST_ROOT)/lib64/ -GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a -GTESTINC = -I$(GTEST_ROOT)/include -else -GTESTLIBDIR = -GTESTLIBS = -GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - -#=== Configure the C++ compiler - -CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra -ifeq ($(shell $(CXX) --version | grep ^nvc++),) -CXXFLAGS += -ffast-math # see issue #117 -endif -###CXXFLAGS+= -Ofast # performance is not different from --fast-math -###CXXFLAGS+= -g # FOR DEBUGGING ONLY - -# Optionally add debug flags to display the full list of flags (eg on Darwin) -###CXXFLAGS+= -v - -# Note: AR, CXX and FC are implicitly defined if not set externally -# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html - -# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -CXXFLAGS += -mmacosx-version-min=11.3 -endif - -#------------------------------------------------------------------------------- - -#=== Configure the GPU compiler (CUDA or HIP) - -# FIXME! (AV 24.01.2024) -# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. -# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. -# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. -# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). - -# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) -# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below -ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside - $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") - override CUDA_HOME=disabled -endif - -# If CUDA_HOME is not set, try to set it from the path to nvcc -ifndef CUDA_HOME - CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) - $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") -endif - -# If HIP_HOME is not set, try to set it from the path to hipcc -ifndef HIP_HOME - HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) - $(warning HIP_HOME was not set: using "$(HIP_HOME)") -endif - -# FIXME! (AV 24.01.2024) -# In the current implementation (without separate builds for C++ and CUDA/HIP), -# builds are performed for HIP only if CUDA is not found in the path. -# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. -# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). - -#--- Option 1: CUDA exists -> use CUDA - -# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists -ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) - - GPUCC = $(CUDA_HOME)/bin/nvcc - USE_NVTX ?=-DUSE_NVTX - # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html - # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # Embed device code for 70, and PTX for 70+. - # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). - # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 - comma:=, - CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) - CUINC = -I$(CUDA_HOME)/include/ - CUOPTFLAGS = -lineinfo - ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math - GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math - ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow - ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) - GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h - # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) - CUBUILDRULEFLAGS = -Xcompiler -fPIC -c - CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu - CUDATESTFLAGS = -lcuda - - # Set the host C++ compiler for GPUCC via "-ccbin " - # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) - GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) - - # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) - ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) - GPUFLAGS += -allow-unsupported-compiler - endif - -else ifneq ($(origin REQUIRE_CUDA),undefined) - - # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) - $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) - -#--- Option 2: CUDA does not exist, HIP exists -> use HIP - -# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists -else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) - - GPUCC = $(HIP_HOME)/bin/hipcc - #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? - HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a - HIPINC = -I$(HIP_HOME)/include/ - # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP - # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) - # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) - GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC - ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow - GPUFLAGS += -std=c++17 - ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) - CUBUILDRULEFLAGS = -fPIC -c - CCBUILDRULEFLAGS = -fPIC -c -x hip - -else ifneq ($(origin REQUIRE_HIP),undefined) - - # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) - $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) - -#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP - -else - - # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ - $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) - $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) - override GPUCC= - override USE_NVTX= - override CUINC= - override HIPINC= - -endif - -# Export GPUCC (so that it can also be used in cudacpp_src.mk?) -export GPUCC -export GPUFLAGS - -#------------------------------------------------------------------------------- - -#=== Configure ccache for C++ and CUDA/HIP builds - -# Enable ccache if USECCACHE=1 -ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) - override CXX:=ccache $(CXX) -endif -#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) -# override AR:=ccache $(AR) -#endif -ifneq ($(GPUCC),) - ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) - override GPUCC:=ccache $(GPUCC) - endif -endif - -#------------------------------------------------------------------------------- - -#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP - -# PowerPC-specific CXX compiler flags (being reviewed) -ifeq ($(UNAME_P),ppc64le) - CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 - # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 - ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change - ###CXXFLAGS+= -fpeel-loops # no change - ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 - ###CXXFLAGS+= -ftree-vectorize # no change - ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! -else - ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... - ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) -endif - -# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) -ifeq ($(UNAME_P),ppc64le) - GPUFLAGS+= -Xcompiler -mno-float128 -endif - -#------------------------------------------------------------------------------- - -#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD - -# Set the default OMPFLAGS choice -ifneq ($(findstring hipcc,$(GPUCC)),) -override OMPFLAGS = # disable OpenMP MT when using hipcc #802 -else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) -override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else -override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms -###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) -endif - -# Set the default AVX (vectorization) choice -ifeq ($(AVX),) - ifeq ($(UNAME_P),ppc64le) - ###override AVX = none - override AVX = sse4 - else ifeq ($(UNAME_P),arm) - ###override AVX = none - override AVX = sse4 - else ifeq ($(wildcard /proc/cpuinfo),) - override AVX = none - $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) - else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) - override AVX = 512y - ###$(info Using AVX='$(AVX)' as no user input exists) - else - override AVX = avx2 - ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) - $(warning Using AVX='$(AVX)' because host does not support avx512vl) - else - $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) - endif - endif -else - ###$(info Using AVX='$(AVX)' according to user input) -endif - -# Set the default FPTYPE (floating point type) choice -ifeq ($(FPTYPE),) - override FPTYPE = d -endif - -# Set the default HELINL (inline helicities?) choice -ifeq ($(HELINL),) - override HELINL = 0 -endif - -# Set the default HRDCOD (hardcode cIPD physics parameters?) choice -ifeq ($(HRDCOD),) - override HRDCOD = 0 -endif - -# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too -export AVX -export FPTYPE -export HELINL -export HRDCOD -export OMPFLAGS - -#------------------------------------------------------------------------------- - -#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND - -# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) -###$(info RNDGEN=$(RNDGEN)) -ifneq ($(RNDGEN),) - $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) - ifeq ($(RNDGEN),hasCurand) - override HASCURAND = $(RNDGEN) - else ifeq ($(RNDGEN),hasNoCurand) - override HASCURAND = $(RNDGEN) - else ifneq ($(RNDGEN),hasNoCurand) - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) - endif -endif - -# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND -# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) -ifeq ($(HASCURAND),) - ifeq ($(GPUCC),) # CPU-only build - override HASCURAND = hasNoCurand - else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build - override HASCURAND = hasCurand - else # non-Nvidia GPU build - override HASCURAND = hasNoCurand - endif -endif - -# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND -# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) -ifeq ($(HASHIPRAND),) - ifeq ($(GPUCC),) # CPU-only build - override HASHIPRAND = hasNoHiprand - else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build - override HASHIPRAND = hasHiprand - else # non-AMD GPU build - override HASHIPRAND = hasNoHiprand - endif -endif - -# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too -# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) -export HASCURAND -export HASHIPRAND - -#------------------------------------------------------------------------------- - -#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD - -# Set the build flags appropriate to OMPFLAGS -$(info OMPFLAGS=$(OMPFLAGS)) -CXXFLAGS += $(OMPFLAGS) - -# Set the build flags appropriate to each AVX choice (example: "make AVX=none") -# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] -# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] -$(info AVX=$(AVX)) -ifeq ($(UNAME_P),ppc64le) - ifeq ($(AVX),sse4) - override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) - else ifneq ($(AVX),none) - $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) - endif -else ifeq ($(UNAME_P),arm) - ifeq ($(AVX),sse4) - override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) - else ifneq ($(AVX),none) - $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) - endif -else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 - ifeq ($(AVX),none) - override AVXFLAGS = -mno-sse3 # no SIMD - else ifeq ($(AVX),sse4) - override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) - else ifeq ($(AVX),avx2) - override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] - else ifeq ($(AVX),512y) - override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] - else ifeq ($(AVX),512z) - override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) - else - $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) - endif -else - ifeq ($(AVX),none) - override AVXFLAGS = -march=x86-64 # no SIMD (see #588) - else ifeq ($(AVX),sse4) - override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) - else ifeq ($(AVX),avx2) - override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] - else ifeq ($(AVX),512y) - override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] - else ifeq ($(AVX),512z) - override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) - else - $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) - endif -endif -# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? -CXXFLAGS+= $(AVXFLAGS) - -# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") -$(info FPTYPE=$(FPTYPE)) -ifeq ($(FPTYPE),d) - CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -else ifeq ($(FPTYPE),f) - CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT -else ifeq ($(FPTYPE),m) - CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT -else - $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) -endif - -# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") -$(info HELINL=$(HELINL)) -ifeq ($(HELINL),1) - CXXFLAGS += -DMGONGPU_INLINE_HELAMPS - GPUFLAGS += -DMGONGPU_INLINE_HELAMPS -else ifneq ($(HELINL),0) - $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) -endif - -# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") -$(info HRDCOD=$(HRDCOD)) -ifeq ($(HRDCOD),1) - CXXFLAGS += -DMGONGPU_HARDCODE_PARAM - GPUFLAGS += -DMGONGPU_HARDCODE_PARAM -else ifneq ($(HRDCOD),0) - $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) -endif - - -#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND - -$(info HASCURAND=$(HASCURAND)) -$(info HASHIPRAND=$(HASHIPRAND)) -override RNDCXXFLAGS= -override RNDLIBFLAGS= - -# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") -ifeq ($(HASCURAND),hasNoCurand) - override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND -else ifeq ($(HASCURAND),hasCurand) - override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! -else - $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) -endif - -# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") -ifeq ($(HASHIPRAND),hasNoHiprand) - override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND -else ifeq ($(HASHIPRAND),hasHiprand) - override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand -else ifneq ($(HASHIPRAND),hasHiprand) - $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) -endif - -#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) -#$(info HASHIPRAND=$(HASHIPRAND)) - -#------------------------------------------------------------------------------- - -#=== Configure build directories and build lockfiles === - -# Build directory "short" tag (defines target and path to the optional build directory) -# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) -override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) - -# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) -# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) - -# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 -ifeq ($(USEBUILDDIR),1) - override BUILDDIR = build.$(DIRTAG) - override LIBDIR = ../lib/$(BUILDDIR) - override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' - $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) -else - override BUILDDIR = . - override LIBDIR = ../lib - override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' - $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) -endif -###override INCDIR = ../../include -###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) - -# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH -# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) -# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary -ifeq ($(UNAME_S),Darwin) - override CXXLIBFLAGSRPATH = - override CULIBFLAGSRPATH = - override CXXLIBFLAGSRPATH2 = - override CULIBFLAGSRPATH2 = -else - # RPATH to cuda/cpp libs when linking executables - override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) - override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) - # RPATH to common lib when linking cuda/cpp libs - override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' - override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' -endif - -# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) -override RUNTIME = - -#=============================================================================== -#=== Makefile TARGETS and build rules below -#=============================================================================== - - -.PHONY: all $(DIRS) - -DIRS := $(wildcard P*) - - -# Construct the library paths -cxx_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cpp"}'; done) -rwgtlib := $(addprefix ,$(addsuffix /librwgt.so,$(DIRS))) - -cxx_rwgt=$(BUILDDIR)/rwgt.exe -ifneq ($(GPUCC),) -cu_rwgt=$(BUILDDIR)/grwgt.exe -grwgtlib := $(addprefix $(DIRS)/,libgrwgt.so) -cu_proclibs := $(shell for dir in $(DIRS); do basename $$dir | awk -F_ '{print "mg5amc_"$$(NF-1)"_"$$NF"_cuda"}'; done) -else -cu_rwgt= -grwgtlib= -cu_proclibs= -endif -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_rwgt) $(cxx_rwgt) - -# Target (and build options): debug -MAKEDEBUG= -debug: OPTFLAGS = -g -O0 -debug: CUOPTFLAGS = -G -debug: MAKEDEBUG := debug -debug: all.$(TAG) - -# Target: tag-specific build lockfiles -override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` -$(BUILDDIR)/.build.$(TAG): - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi - @touch $(BUILDDIR)/.build.$(TAG) - -# Generic target and build rules: objects from CUDA or HIP compilation -# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) -ifneq ($(GPUCC),) -$(BUILDDIR)/%%.o : %%.cu *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ - -$(BUILDDIR)/%%_cu.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ -endif - -# Generic target and build rules: objects from C++ compilation -# (NB do not include CUINC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%%.o : %%.cc *.h ../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ - -# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) -# Added edgecase for HIP compilation -ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -ifeq ($(findstring nvcc,$(GPUCC)),nvcc) - $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math -else - $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math -endif -endif - -# # Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) -# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -# $(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) - -# # Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o -# $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -# $(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) -# ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -# $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) -# endif -# ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -# $(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) -# endif - -# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) -ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) -ifneq ($(GPUCC),) -GPUFLAGS += -Wno-deprecated-builtins -endif -endif - -# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) -# This patch does remove the warning, but I prefer to keep it disabled for the moment... -###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option -###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option -###endif -###endif - -#### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto - -#### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) - -#------------------------------------------------------------------------------- - -# Target (and build rules): common (src) library -commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so - -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) - $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) - -#------------------------------------------------------------------------------- - -#processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') -###$(info processid_short=$(processid_short)) - -#MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -#cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -#cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o - -#ifneq ($(GPUCC),) -#MG5AMC_CULIB = mg5amc_$(processid_short)_cuda -#cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o -#cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o -#endif - -# Target (and build rules): C++ and CUDA shared libraries -#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o -#$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so -# $(CXX) -shared -o $@ $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) - -ifneq ($(GPUCC),) -#$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o -#$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o -$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so - $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# Bypass std::filesystem completely to ease portability on LUMI #803 -ifneq ($(findstring hipcc,$(GPUCC)),) - $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs -else - $(GPUCC) --shared -o $@ $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -endif -endif - -#------------------------------------------------------------------------------- - -# Target (and build rules): Fortran include files -###$(INCDIR)/%%.inc : ../%%.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### \cp $< $@ - -#------------------------------------------------------------------------------- - -#HERE LOOP MAKE OVER P DIRECTORIES AND ADD RWGT_RUNNER_LIBS -# Ensure each librwgt.a depends on its directory being built -$(rwgtlib): - @$(MAKE) -C $(@D) VARIABLE=true - -# Target (and build rules): C++ and CUDA standalone executables -$(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(rwgtlib) - $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -l$(cxx_proclibs) $(rwgtlib) - -ifneq ($(GPUCC),) -ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(cu_rwgt): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(cu_rwgt): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(cu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc -endif -$(cu_rwgt): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cu_rwgt): rwgtlibs $(BUILDDIR)/grwgt.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(DIRS) - $(GPUCC) -o $@ $(BUILDDIR)/grwgt.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) -l$(cu_proclibs) $(grwgtlib) -endif - -#------------------------------------------------------------------------------- - -# Generic target and build rules: objects from Fortran compilation -#$(BUILDDIR)/%%.o : %%.f *.inc -# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -# $(FC) -I. -c $< -o $@ - -# Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%%.o : %%.f *.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -### $(FC) -I. -I$(INCDIR) -c $< -o $@ - -# Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc - -#ifeq ($(UNAME_S),Darwin) -#$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -#endif -#$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -#$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) -#ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ -#else -# $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -#endif - -# ifneq ($(GPUCC),) -# ifneq ($(shell $(CXX) --version | grep ^Intel),) -# $(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -# $(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -# endif -# ifeq ($(UNAME_S),Darwin) -# $(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -# endif -# $(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) -# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -# else -# $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -# endif -# endif - -#------------------------------------------------------------------------------- - -# Target (and build rules): test objects and test executable -# $(BUILDDIR)/testxxx.o: $(GTESTLIBS) -# $(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -# $(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -# $(testmain): $(BUILDDIR)/testxxx.o -# $(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions - -# ifneq ($(GPUCC),) -# $(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) -# $(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) -# $(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt -# $(testmain): $(BUILDDIR)/testxxx_cu.o -# $(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions -# endif - -# $(BUILDDIR)/testmisc.o: $(GTESTLIBS) -# $(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -# $(testmain): $(BUILDDIR)/testmisc.o -# $(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests - -# ifneq ($(GPUCC),) -# $(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) -# $(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) -# $(testmain): $(BUILDDIR)/testmisc_cu.o -# $(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests -# endif - -# $(BUILDDIR)/runTest.o: $(GTESTLIBS) -# $(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -# $(testmain): $(BUILDDIR)/runTest.o -# $(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o - -# ifneq ($(GPUCC),) -# $(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) -# $(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) -# ifneq ($(shell $(CXX) --version | grep ^Intel),) -# $(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -# $(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -# else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -# $(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc -# endif -# $(testmain): $(BUILDDIR)/runTest_cu.o -# $(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o -# endif - -# $(testmain): $(GTESTLIBS) -# $(testmain): INCFLAGS += $(GTESTINC) -# $(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main - -# ifneq ($(OMPFLAGS),) -# ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -# $(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) -# else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -# $(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -# ###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -# ###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) -# else -# $(testmain): LIBFLAGS += -lgomp -# endif -# endif - -# # Bypass std::filesystem completely to ease portability on LUMI #803 -# #ifneq ($(findstring hipcc,$(GPUCC)),) -# #$(testmain): LIBFLAGS += -lstdc++fs -# #endif - -# ifeq ($(GPUCC),) # link only runTest.o -# $(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) -# $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -# else # link both runTest.o and runTest_cu.o -# $(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -# $(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) -# ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 -# $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -# else -# $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -# endif -# endif - -# # Use target gtestlibs to build only googletest -# ifneq ($(GTESTLIBS),) -# gtestlibs: $(GTESTLIBS) -# endif - -# # Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 -# $(GTESTLIBS): -# ifneq ($(shell which flock 2>/dev/null),) -# @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -# flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) -# else -# if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi -# endif - -#------------------------------------------------------------------------------- - -# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) -# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds -# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) -avxnone: - @echo - $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) - -avxsse4: - @echo - $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) - -avxavx2: - @echo - $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) - -avx512y: - @echo - $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) - -avx512z: - @echo - $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) - -ifeq ($(UNAME_P),ppc64le) -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 -avxall: avxnone avxsse4 -else ifeq ($(UNAME_P),arm) -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 -avxall: avxnone avxsse4 -else -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z -avxall: avxnone avxsse4 avxavx2 avx512y avx512z -endif - -#------------------------------------------------------------------------------- - -# Target: clean the builds -.PHONY: clean - -clean: -ifeq ($(USEBUILDDIR),1) - rm -rf $(BUILDDIR) -else - rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe - rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so -endif - $(MAKE) -C ../src clean -f $(CUDACPP_SRC_MAKEFILE) -### rm -rf $(INCDIR) - -cleanall: - @echo - $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) - @echo - $(MAKE) USEBUILDDIR=0 -C ../src cleanall -f $(CUDACPP_SRC_MAKEFILE) - rm -rf build.* - -# Target: clean the builds as well as the gtest installation(s) -distclean: cleanall -ifneq ($(wildcard $(TESTDIRCOMMON)),) - $(MAKE) -C $(TESTDIRCOMMON) clean -endif - $(MAKE) -C $(TESTDIRLOCAL) clean - -#------------------------------------------------------------------------------- - -# Target: show system and compiler information -info: - @echo "" - @uname -spn # e.g. Linux nodename.cern.ch x86_64 -ifeq ($(UNAME_S),Darwin) - @sysctl -a | grep -i brand - @sysctl -a | grep machdep.cpu | grep features || true - @sysctl -a | grep hw.physicalcpu: - @sysctl -a | grep hw.logicalcpu: -else - @cat /proc/cpuinfo | grep "model name" | sort -u - @cat /proc/cpuinfo | grep "flags" | sort -u - @cat /proc/cpuinfo | grep "cpu cores" | sort -u - @cat /proc/cpuinfo | grep "physical id" | sort -u -endif - @echo "" -ifneq ($(shell which nvidia-smi 2>/dev/null),) - nvidia-smi -L - @echo "" -endif - @echo USECCACHE=$(USECCACHE) -ifeq ($(USECCACHE),1) - ccache --version | head -1 -endif - @echo "" - @echo GPUCC=$(GPUCC) -ifneq ($(GPUCC),) - $(GPUCC) --version -endif - @echo "" - @echo CXX=$(CXX) -ifneq ($(shell $(CXX) --version | grep ^clang),) - @echo $(CXX) -v - @$(CXX) -v |& egrep -v '(Found|multilib)' - @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' -else - $(CXX) --version -endif - @echo "" - @echo FC=$(FC) - $(FC) --version - -#------------------------------------------------------------------------------- - -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -ifneq ($(GPUCC),) -check: runTest cmpFcheck cmpFGcheck -else -check: runTest cmpFcheck -endif - -# Target: runTest (run the C++ test executable runTest.exe) -runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe - -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) -runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 - -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) -runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 - -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) -cmpFcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) -memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 - -#------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk deleted file mode 100644 index 80fbe5b8e7..0000000000 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_runner.mk +++ /dev/null @@ -1,1051 +0,0 @@ -# Copyright (C) 2020-2023 CERN and UCLouvain. -# Licensed under the GNU Lesser General Public License (version 3 or later). -# Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. - -#=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) -#=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts -#=== NB: use 'override' to ensure that the value can not be modified from the outside -override CUDACPP_MAKEFILE := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -###$(info CUDACPP_MAKEFILE='$(CUDACPP_MAKEFILE)') - -#=== NB: different names (e.g. cudacpp.mk and cudacpp_src.mk) are used in the Subprocess and src directories -override CUDACPP_SRC_MAKEFILE = cudacpp_src.mk - -#------------------------------------------------------------------------------- - -#=== Use bash in the Makefile (https://www.gnu.org/software/make/manual/html_node/Choosing-the-Shell.html) - -SHELL := /bin/bash - -#------------------------------------------------------------------------------- - -#=== Detect O/S and architecture (assuming uname is available, https://en.wikipedia.org/wiki/Uname) - -# Detect O/S kernel (Linux, Darwin...) -UNAME_S := $(shell uname -s) -###$(info UNAME_S='$(UNAME_S)') - -# Detect architecture (x86_64, ppc64le...) -UNAME_P := $(shell uname -p) -###$(info UNAME_P='$(UNAME_P)') - -#------------------------------------------------------------------------------- - -#=== Include the common MG5aMC Makefile options - -# OM: this is crucial for MG5aMC flag consistency/documentation -# AV: temporarely comment this out because it breaks cudacpp builds -ifneq ($(wildcard ../../Source/make_opts),) -include ../../Source/make_opts -endif - -#------------------------------------------------------------------------------- - -#=== Configure common compiler flags for C++ and CUDA/HIP - -INCFLAGS = -I. -OPTFLAGS = -O3 # this ends up in GPUFLAGS too (should it?), cannot add -Ofast or -ffast-math here - -# Dependency on src directory -MG5AMC_COMMONLIB = mg5amc_common -LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -INCFLAGS += -I../../src - -# Compiler-specific googletest build directory (#125 and #738) -ifneq ($(shell $(CXX) --version | grep '^Intel(R) oneAPI DPC++/C++ Compiler'),) -override CXXNAME = icpx$(shell $(CXX) --version | head -1 | cut -d' ' -f5) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -override CXXNAME = clang$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else ifneq ($(shell $(CXX) --version | grep '^g++ (GCC)'),) -override CXXNAME = gcc$(shell $(CXX) --version | head -1 | cut -d' ' -f3) -else -override CXXNAME = unknown -endif -###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) -export CXXNAMESUFFIX - -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../../test -TESTDIRLOCAL = ../../test -ifneq ($(wildcard $(GTEST_ROOT)),) -TESTDIR = -else ifneq ($(LOCALGTEST),) -TESTDIR=$(TESTDIRLOCAL) -GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../../epochX/cudacpp/CODEGEN),) -TESTDIR = $(TESTDIRCOMMON) -GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else -TESTDIR = -endif -ifneq ($(GTEST_ROOT),) -GTESTLIBDIR = $(GTEST_ROOT)/lib64/ -GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a -GTESTINC = -I$(GTEST_ROOT)/include -else -GTESTLIBDIR = -GTESTLIBS = -GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - -#------------------------------------------------------------------------------- - -#=== Configure the C++ compiler - -CXXFLAGS = $(OPTFLAGS) -std=c++17 $(INCFLAGS) -Wall -Wshadow -Wextra -ifeq ($(shell $(CXX) --version | grep ^nvc++),) -CXXFLAGS += -ffast-math # see issue #117 -endif -###CXXFLAGS+= -Ofast # performance is not different from --fast-math -###CXXFLAGS+= -g # FOR DEBUGGING ONLY - -# Optionally add debug flags to display the full list of flags (eg on Darwin) -###CXXFLAGS+= -v - -# Note: AR, CXX and FC are implicitly defined if not set externally -# See https://www.gnu.org/software/make/manual/html_node/Implicit-Variables.html - -# Add -mmacosx-version-min=11.3 to avoid "ld: warning: object file was built for newer macOS version than being linked" -ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -CXXFLAGS += -mmacosx-version-min=11.3 -endif - -#------------------------------------------------------------------------------- - -#=== Configure the GPU compiler (CUDA or HIP) - -# FIXME! (AV 24.01.2024) -# In the current implementation (without separate builds for C++ and CUDA/HIP), we first check for cudacc and hipcc in CUDA_HOME and HIP_HOME. -# If CUDA_HOME or HIP_HOME are not set, try to determine them from the path to cudacc and hipcc. -# While convoluted, this is currently necessary to allow disabling CUDA/HIP builds by setting CUDA_HOME or HIP_HOME to invalid paths. -# This will (probably?) be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). - -# If CXX is not a single word (example "clang++ --gcc-toolchain...") then disable CUDA builds (issue #505) -# This is because it is impossible to pass this to "GPUFLAGS += -ccbin " below -ifneq ($(words $(subst ccache ,,$(CXX))),1) # allow at most "CXX=ccache " from outside - $(warning CUDA builds are not supported for multi-word CXX "$(CXX)") - override CUDA_HOME=disabled -endif - -# If CUDA_HOME is not set, try to set it from the path to nvcc -ifndef CUDA_HOME - CUDA_HOME = $(patsubst %%/bin/nvcc,%%,$(shell which nvcc 2>/dev/null)) - $(warning CUDA_HOME was not set: using "$(CUDA_HOME)") -endif - -# If HIP_HOME is not set, try to set it from the path to hipcc -ifndef HIP_HOME - HIP_HOME = $(patsubst %%/bin/hipcc,%%,$(shell which hipcc 2>/dev/null)) - $(warning HIP_HOME was not set: using "$(HIP_HOME)") -endif - -# FIXME! (AV 24.01.2024) -# In the current implementation (without separate builds for C++ and CUDA/HIP), -# builds are performed for HIP only if CUDA is not found in the path. -# If both CUDA and HIP are installed, HIP builds can be triggered by unsetting CUDA_HOME. -# This will be fixed when separate C++ and CUDA/HIP builds are implemented (PR #775). - -#--- Option 1: CUDA exists -> use CUDA - -# Set GPUCC as $(CUDA_HOME)/bin/nvcc if it exists -ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) - - GPUCC = $(CUDA_HOME)/bin/nvcc - USE_NVTX ?=-DUSE_NVTX - # See https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html - # See https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/ - # Default: use compute capability 70 for V100 (CERN lxbatch, CERN itscrd, Juwels Cluster). - # Embed device code for 70, and PTX for 70+. - # Export MADGRAPH_CUDA_ARCHITECTURE (comma-separated list) to use another value or list of values (see #533). - # Examples: use 60 for P100 (Piz Daint), 80 for A100 (Juwels Booster, NVidia raplab/Curiosity). - MADGRAPH_CUDA_ARCHITECTURE ?= 70 - ###CUARCHFLAGS = -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=compute_$(MADGRAPH_CUDA_ARCHITECTURE) -gencode arch=compute_$(MADGRAPH_CUDA_ARCHITECTURE),code=sm_$(MADGRAPH_CUDA_ARCHITECTURE) # Older implementation (AV): go back to this one for multi-GPU support #533 - ###CUARCHFLAGS = --gpu-architecture=compute_$(MADGRAPH_CUDA_ARCHITECTURE) --gpu-code=sm_$(MADGRAPH_CUDA_ARCHITECTURE),compute_$(MADGRAPH_CUDA_ARCHITECTURE) # Newer implementation (SH): cannot use this as-is for multi-GPU support #533 - comma:=, - CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) - CUINC = -I$(CUDA_HOME)/include/ - CUOPTFLAGS = -lineinfo - ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math - GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math - ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow - ###GPUCC_VERSION = $(shell $(GPUCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1) - GPUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h - # Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 96 # degrades throughput: 4.1E8 (16384 32 12) up to 4.5E8 (65536 128 12) - ###GPUFLAGS+= --maxrregcount 64 # degrades throughput: 1.7E8 (16384 32 12) flat at 1.7E8 (65536 128 12) - CUBUILDRULEFLAGS = -Xcompiler -fPIC -c - CCBUILDRULEFLAGS = -Xcompiler -fPIC -c -x cu - CUDATESTFLAGS = -lcuda - - # Set the host C++ compiler for GPUCC via "-ccbin " - # (NB issue #505: this must be a single word, "clang++ --gcc-toolchain..." is not supported) - GPUFLAGS += -ccbin $(shell which $(subst ccache ,,$(CXX))) - - # Allow newer (unsupported) C++ compilers with older versions of CUDA if ALLOW_UNSUPPORTED_COMPILER_IN_CUDA is set (#504) - ifneq ($(origin ALLOW_UNSUPPORTED_COMPILER_IN_CUDA),undefined) - GPUFLAGS += -allow-unsupported-compiler - endif - -else ifneq ($(origin REQUIRE_CUDA),undefined) - - # If REQUIRE_CUDA is set but no cuda is found, stop here (e.g. for CI tests on GPU #443) - $(error No cuda installation found (set CUDA_HOME or make GPUCC visible in PATH)) - -#--- Option 2: CUDA does not exist, HIP exists -> use HIP - -# Set GPUCC as $(HIP_HOME)/bin/hipcc if it exists -else ifneq ($(wildcard $(HIP_HOME)/bin/hipcc),) - - GPUCC = $(HIP_HOME)/bin/hipcc - #USE_NVTX ?=-DUSE_NVTX # should maybe find something equivalent to this in HIP? - HIPARCHFLAGS = -target x86_64-linux-gnu --offload-arch=gfx90a - HIPINC = -I$(HIP_HOME)/include/ - # Note: -DHIP_FAST_MATH is equivalent to -use_fast_math in HIP - # (but only for single precision line 208: https://rocm-developer-tools.github.io/HIP/hcc__detail_2math__functions_8h_source.html) - # Note: CUOPTFLAGS should not be used for HIP, it had been added here but was then removed (#808) - GPUFLAGS = $(OPTFLAGS) $(INCFLAGS) $(HIPINC) $(HIPARCHFLAGS) -DHIP_FAST_MATH -DHIP_PLATFORM=amd -fPIC - ###GPUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow - GPUFLAGS += -std=c++17 - ###GPUFLAGS+= --maxrregcount 255 # (AV: is this option valid on HIP and meaningful on AMD GPUs?) - CUBUILDRULEFLAGS = -fPIC -c - CCBUILDRULEFLAGS = -fPIC -c -x hip - -else ifneq ($(origin REQUIRE_HIP),undefined) - - # If REQUIRE_HIP is set but no HIP is found, stop here (e.g. for CI tests on GPU #443) - $(error No hip installation found (set HIP_HOME or make GPUCC visible in PATH)) - -#--- Option 3: CUDA does not exist, HIP does not exist -> switch off both CUDA and HIP - -else - - # No cudacc and no hipcc: switch CUDA and HIP compilation off and go to common random numbers in C++ - $(warning CUDA_HOME is not set or is invalid: export CUDA_HOME to compile with cuda) - $(warning HIP_HOME is not set or is invalid: export HIP_HOME to compile with hip) - override GPUCC= - override USE_NVTX= - override CUINC= - override HIPINC= - -endif - -# Export GPUCC (so that it can also be used in cudacpp_src.mk?) -export GPUCC -export GPUFLAGS - -#------------------------------------------------------------------------------- - -#=== Configure ccache for C++ and CUDA/HIP builds - -# Enable ccache if USECCACHE=1 -ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) - override CXX:=ccache $(CXX) -endif -#ifeq ($(USECCACHE)$(shell echo $(AR) | grep ccache),1) -# override AR:=ccache $(AR) -#endif -ifneq ($(GPUCC),) - ifeq ($(USECCACHE)$(shell echo $(GPUCC) | grep ccache),1) - override GPUCC:=ccache $(GPUCC) - endif -endif - -#------------------------------------------------------------------------------- - -#=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP - -# PowerPC-specific CXX compiler flags (being reviewed) -ifeq ($(UNAME_P),ppc64le) - CXXFLAGS+= -mcpu=power9 -mtune=power9 # gains ~2-3%% both for none and sse4 - # Throughput references without the extra flags below: none=1.41-1.42E6, sse4=2.15-2.19E6 - ###CXXFLAGS+= -DNO_WARN_X86_INTRINSICS # no change - ###CXXFLAGS+= -fpeel-loops # no change - ###CXXFLAGS+= -funroll-loops # gains ~1%% for none, loses ~1%% for sse4 - ###CXXFLAGS+= -ftree-vectorize # no change - ###CXXFLAGS+= -flto # would increase to none=4.08-4.12E6, sse4=4.99-5.03E6! -else - ###CXXFLAGS+= -flto # also on Intel this would increase throughputs by a factor 2 to 4... - ######CXXFLAGS+= -fno-semantic-interposition # no benefit (neither alone, nor combined with -flto) -endif - -# PowerPC-specific CUDA/HIP compiler flags (to be reviewed!) -ifeq ($(UNAME_P),ppc64le) - GPUFLAGS+= -Xcompiler -mno-float128 -endif - -#------------------------------------------------------------------------------- - -#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD - -# Set the default OMPFLAGS choice -ifneq ($(findstring hipcc,$(GPUCC)),) -override OMPFLAGS = # disable OpenMP MT when using hipcc #802 -else ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT on Intel (was ok without GPUCC but not ok with GPUCC before #578) -else ifneq ($(shell $(CXX) --version | egrep '^(clang)'),) -override OMPFLAGS = -fopenmp -###override OMPFLAGS = # disable OpenMP MT on clang (was not ok without or with nvcc before #578) -###else ifneq ($(shell $(CXX) --version | egrep '^(Apple clang)'),) # AV for Mac (Apple clang compiler) -else ifeq ($(UNAME_S),Darwin) # OM for Mac (any compiler) -override OMPFLAGS = # AV disable OpenMP MT on Apple clang (builds fail in the CI #578) -###override OMPFLAGS = -fopenmp # OM reenable OpenMP MT on Apple clang? (AV Oct 2023: this still fails in the CI) -else -override OMPFLAGS = -fopenmp # enable OpenMP MT by default on all other platforms -###override OMPFLAGS = # disable OpenMP MT on all other platforms (default before #575) -endif - -# Set the default AVX (vectorization) choice -ifeq ($(AVX),) - ifeq ($(UNAME_P),ppc64le) - ###override AVX = none - override AVX = sse4 - else ifeq ($(UNAME_P),arm) - ###override AVX = none - override AVX = sse4 - else ifeq ($(wildcard /proc/cpuinfo),) - override AVX = none - $(warning Using AVX='$(AVX)' because host SIMD features cannot be read from /proc/cpuinfo) - else ifeq ($(shell grep -m1 -c avx512vl /proc/cpuinfo)$(shell $(CXX) --version | grep ^clang),1) - override AVX = 512y - ###$(info Using AVX='$(AVX)' as no user input exists) - else - override AVX = avx2 - ifneq ($(shell grep -m1 -c avx512vl /proc/cpuinfo),1) - $(warning Using AVX='$(AVX)' because host does not support avx512vl) - else - $(warning Using AVX='$(AVX)' because this is faster than avx512vl for clang) - endif - endif -else - ###$(info Using AVX='$(AVX)' according to user input) -endif - -# Set the default FPTYPE (floating point type) choice -ifeq ($(FPTYPE),) - override FPTYPE = d -endif - -# Set the default HELINL (inline helicities?) choice -ifeq ($(HELINL),) - override HELINL = 0 -endif - -# Set the default HRDCOD (hardcode cIPD physics parameters?) choice -ifeq ($(HRDCOD),) - override HRDCOD = 0 -endif - -# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too -export AVX -export FPTYPE -export HELINL -export HRDCOD -export OMPFLAGS - -#------------------------------------------------------------------------------- - -#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND - -# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) -###$(info RNDGEN=$(RNDGEN)) -ifneq ($(RNDGEN),) - $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) - ifeq ($(RNDGEN),hasCurand) - override HASCURAND = $(RNDGEN) - else ifeq ($(RNDGEN),hasNoCurand) - override HASCURAND = $(RNDGEN) - else ifneq ($(RNDGEN),hasNoCurand) - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) - endif -endif - -# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND -# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) -ifeq ($(HASCURAND),) - ifeq ($(GPUCC),) # CPU-only build - override HASCURAND = hasNoCurand - else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build - override HASCURAND = hasCurand - else # non-Nvidia GPU build - override HASCURAND = hasNoCurand - endif -endif - -# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND -# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) -ifeq ($(HASHIPRAND),) - ifeq ($(GPUCC),) # CPU-only build - override HASHIPRAND = hasNoHiprand - else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build - override HASHIPRAND = hasHiprand - else # non-AMD GPU build - override HASHIPRAND = hasNoHiprand - endif -endif - -# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too -# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) -export HASCURAND -export HASHIPRAND - -#------------------------------------------------------------------------------- - -#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD - -# Set the build flags appropriate to OMPFLAGS -$(info OMPFLAGS=$(OMPFLAGS)) -CXXFLAGS += $(OMPFLAGS) - -# Set the build flags appropriate to each AVX choice (example: "make AVX=none") -# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro] -# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476] -$(info AVX=$(AVX)) -ifeq ($(UNAME_P),ppc64le) - ifeq ($(AVX),sse4) - override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers) - else ifneq ($(AVX),none) - $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on PowerPC for the moment) - endif -else ifeq ($(UNAME_P),arm) - ifeq ($(AVX),sse4) - override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers) - else ifneq ($(AVX),none) - $(error Unknown AVX='$(AVX)': only 'none' and 'sse4' are supported on ARM for the moment) - endif -else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 - ifeq ($(AVX),none) - override AVXFLAGS = -mno-sse3 # no SIMD - else ifeq ($(AVX),sse4) - override AVXFLAGS = -mno-avx # SSE4.2 with 128 width (xmm registers) - else ifeq ($(AVX),avx2) - override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] - else ifeq ($(AVX),512y) - override AVXFLAGS = -march=skylake -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] - else ifeq ($(AVX),512z) - override AVXFLAGS = -march=skylake -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) - else - $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) - endif -else - ifeq ($(AVX),none) - override AVXFLAGS = -march=x86-64 # no SIMD (see #588) - else ifeq ($(AVX),sse4) - override AVXFLAGS = -march=nehalem # SSE4.2 with 128 width (xmm registers) - else ifeq ($(AVX),avx2) - override AVXFLAGS = -march=haswell # AVX2 with 256 width (ymm registers) [DEFAULT for clang] - else ifeq ($(AVX),512y) - override AVXFLAGS = -march=skylake-avx512 -mprefer-vector-width=256 # AVX512 with 256 width (ymm registers) [DEFAULT for gcc] - else ifeq ($(AVX),512z) - override AVXFLAGS = -march=skylake-avx512 -DMGONGPU_PVW512 # AVX512 with 512 width (zmm registers) - else - $(error Unknown AVX='$(AVX)': only 'none', 'sse4', 'avx2', '512y' and '512z' are supported) - endif -endif -# For the moment, use AVXFLAGS everywhere: eventually, use them only in encapsulated implementations? -CXXFLAGS+= $(AVXFLAGS) - -# Set the build flags appropriate to each FPTYPE choice (example: "make FPTYPE=f") -$(info FPTYPE=$(FPTYPE)) -ifeq ($(FPTYPE),d) - CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE -else ifeq ($(FPTYPE),f) - CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT -else ifeq ($(FPTYPE),m) - CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT -else - $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) -endif - -# Set the build flags appropriate to each HELINL choice (example: "make HELINL=1") -$(info HELINL=$(HELINL)) -ifeq ($(HELINL),1) - CXXFLAGS += -DMGONGPU_INLINE_HELAMPS - GPUFLAGS += -DMGONGPU_INLINE_HELAMPS -else ifneq ($(HELINL),0) - $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) -endif - -# Set the build flags appropriate to each HRDCOD choice (example: "make HRDCOD=1") -$(info HRDCOD=$(HRDCOD)) -ifeq ($(HRDCOD),1) - CXXFLAGS += -DMGONGPU_HARDCODE_PARAM - GPUFLAGS += -DMGONGPU_HARDCODE_PARAM -else ifneq ($(HRDCOD),0) - $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) -endif - - -#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND - -$(info HASCURAND=$(HASCURAND)) -$(info HASHIPRAND=$(HASHIPRAND)) -override RNDCXXFLAGS= -override RNDLIBFLAGS= - -# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") -ifeq ($(HASCURAND),hasNoCurand) - override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND -else ifeq ($(HASCURAND),hasCurand) - override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! -else - $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) -endif - -# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") -ifeq ($(HASHIPRAND),hasNoHiprand) - override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND -else ifeq ($(HASHIPRAND),hasHiprand) - override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand -else ifneq ($(HASHIPRAND),hasHiprand) - $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) -endif - -#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) -#$(info HASHIPRAND=$(HASHIPRAND)) - -#------------------------------------------------------------------------------- - -#=== Configure build directories and build lockfiles === - -# Build directory "short" tag (defines target and path to the optional build directory) -# (Rationale: keep directory names shorter, e.g. do not include random number generator choice) -override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) - -# Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) -# (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) - -# Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 -ifeq ($(USEBUILDDIR),1) - override BUILDDIR = build.$(DIRTAG) - override LIBDIR = ../../lib/$(BUILDDIR) - override LIBDIRRPATH = '$$ORIGIN/../$(LIBDIR)' - $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is set = 1)) -else - override BUILDDIR = . - override LIBDIR = ../../lib - override LIBDIRRPATH = '$$ORIGIN/$(LIBDIR)' - $(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG) (USEBUILDDIR is not set)) -endif -###override INCDIR = ../../include -###$(info Building in BUILDDIR=$(BUILDDIR) for tag=$(TAG)) - -# On Linux, set rpath to LIBDIR to make it unnecessary to use LD_LIBRARY_PATH -# Use relative paths with respect to the executables or shared libraries ($ORIGIN on Linux) -# On Darwin, building libraries with absolute paths in LIBDIR makes this unnecessary -ifeq ($(UNAME_S),Darwin) - override CXXLIBFLAGSRPATH = - override CULIBFLAGSRPATH = - override CXXLIBFLAGSRPATH2 = - override CULIBFLAGSRPATH2 = -else - # RPATH to cuda/cpp libs when linking executables - override CXXLIBFLAGSRPATH = -Wl,-rpath=$(LIBDIRRPATH) - override CULIBFLAGSRPATH = -Xlinker -rpath=$(LIBDIRRPATH) - # RPATH to common lib when linking cuda/cpp libs - override CXXLIBFLAGSRPATH2 = -Wl,-rpath='$$ORIGIN' - override CULIBFLAGSRPATH2 = -Xlinker -rpath='$$ORIGIN' -endif - -# Setting LD_LIBRARY_PATH or DYLD_LIBRARY_PATH in the RUNTIME is no longer necessary (neither on Linux nor on Mac) -override RUNTIME = - -#=============================================================================== -#=== Makefile TARGETS and build rules below -#=============================================================================== - -cxx_main=$(BUILDDIR)/check.exe -fcxx_main=$(BUILDDIR)/fcheck.exe -cxx_rwgtlib=$(BUILDDIR)/librwgt.so - -ifneq ($(GPUCC),) -cu_main=$(BUILDDIR)/gcheck.exe -fcu_main=$(BUILDDIR)/fgcheck.exe -cu_rwgtlib=$(BUILDDIR)/libgrwgt.so -else -cu_main= -fcu_main= -cu_rwgtlib= -endif - -testmain=$(BUILDDIR)/runTest.exe - -ifneq ($(GTESTLIBS),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) $(testmain) -else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_main) $(cxx_main) $(fcu_main) $(fcxx_main) $(cu_rwgtlib) $(cxx_rwgtlib) -endif - -# Target (and build options): debug -MAKEDEBUG= -debug: OPTFLAGS = -g -O0 -debug: CUOPTFLAGS = -G -debug: MAKEDEBUG := debug -debug: all.$(TAG) - -# Target: tag-specific build lockfiles -override oldtagsb=`if [ -d $(BUILDDIR) ]; then find $(BUILDDIR) -maxdepth 1 -name '.build.*' ! -name '.build.$(TAG)' -exec echo $(shell pwd)/{} \; ; fi` -$(BUILDDIR)/.build.$(TAG): - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi - @touch $(BUILDDIR)/.build.$(TAG) - -# Generic target and build rules: objects from CUDA or HIP compilation -# NB: CCBUILDRULEFLAGS includes "-x cu" for nvcc and "-x hip" for hipcc (#810) -ifneq ($(GPUCC),) -$(BUILDDIR)/%%.o : %%.cu *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CUBUILDRULEFLAGS) $< -o $@ - -$(BUILDDIR)/%%_cu.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(GPUCC) $(CPPFLAGS) $(GPUFLAGS) $(CCBUILDRULEFLAGS) $< -o $@ -endif - -# Generic target and build rules: objects from C++ compilation -# (NB do not include CUINC here! add it only for NVTX or curand #679) -$(BUILDDIR)/%%.o : %%.cc *.h ../../src/*.h $(BUILDDIR)/.build.$(TAG) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(CXX) $(CPPFLAGS) $(CXXFLAGS) -fPIC -c $< -o $@ - -# Apply special build flags only to CrossSectionKernel[_cu].o (no fast math, see #117 and #516) -# Added edgecase for HIP compilation -ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -fno-fast-math -ifeq ($(findstring nvcc,$(GPUCC)),nvcc) - $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -fno-fast-math -else - $(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -fno-fast-math -endif -endif - -# Apply special build flags only to check_sa[_cu].o (NVTX in timermap.h, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -$(BUILDDIR)/check_sa_cu.o: CXXFLAGS += $(USE_NVTX) $(CUINC) - -# Apply special build flags only to check_sa[_cu].o and (Cu|Hip)randRandomNumberKernel[_cu].o -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/check_sa_cu.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CurandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/HiprandRandomNumberKernel_cu.o: CUFLAGS += $(RNDCXXFLAGS) -ifeq ($(HASCURAND),hasCurand) # curand headers, #679 -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) -endif -ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers -$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) -endif - -# Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) -ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) -ifneq ($(GPUCC),) -GPUFLAGS += -Wno-deprecated-builtins -endif -endif - -# Avoid clang warning "overriding '-ffp-contract=fast' option with '-ffp-contract=on'" (#516) -# This patch does remove the warning, but I prefer to keep it disabled for the moment... -###ifneq ($(shell $(CXX) --version | egrep '^(clang|Apple clang|Intel)'),) -###$(BUILDDIR)/CrossSectionKernels.o: CXXFLAGS += -Wno-overriding-t-option -###ifneq ($(GPUCC),) -###$(BUILDDIR)/gCrossSectionKernels.o: GPUFLAGS += -Xcompiler -Wno-overriding-t-option -###endif -###endif - -#### Apply special build flags only to CPPProcess.o (-flto) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += -flto - -#### Apply special build flags only to CPPProcess.o (AVXFLAGS) -###$(BUILDDIR)/CPPProcess.o: CXXFLAGS += $(AVXFLAGS) - -#------------------------------------------------------------------------------- - -# Target (and build rules): common (src) library -commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so - -$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../../src/*.h ../../src/*.cc $(BUILDDIR)/.build.$(TAG) - $(MAKE) -C ../../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) - -#------------------------------------------------------------------------------- - -processid_short=$(shell basename $(CURDIR) | awk -F_ '{print $$(NF-1)"_"$$NF}') -###$(info processid_short=$(processid_short)) - -MG5AMC_CXXLIB = mg5amc_$(processid_short)_cpp -cxx_objects_lib=$(BUILDDIR)/CPPProcess.o $(BUILDDIR)/MatrixElementKernels.o $(BUILDDIR)/BridgeKernels.o $(BUILDDIR)/CrossSectionKernels.o -cxx_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel.o $(BUILDDIR)/RamboSamplingKernels.o - -ifneq ($(GPUCC),) -MG5AMC_CULIB = mg5amc_$(processid_short)_cuda -cu_objects_lib=$(BUILDDIR)/CPPProcess_cu.o $(BUILDDIR)/MatrixElementKernels_cu.o $(BUILDDIR)/BridgeKernels_cu.o $(BUILDDIR)/CrossSectionKernels_cu.o -cu_objects_exe=$(BUILDDIR)/CommonRandomNumberKernel_cu.o $(BUILDDIR)/RamboSamplingKernels_cu.o -endif - -# Target (and build rules): C++ and CUDA shared libraries -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: cxx_objects_lib += $(BUILDDIR)/fbridge.o -$(LIBDIR)/lib$(MG5AMC_CXXLIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) - -ifneq ($(GPUCC),) -$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(BUILDDIR)/fbridge_cu.o -$(LIBDIR)/lib$(MG5AMC_CULIB).so: cu_objects_lib += $(BUILDDIR)/fbridge_cu.o -$(LIBDIR)/lib$(MG5AMC_CULIB).so: $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cu_objects_lib) - $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -# Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -lstdc++fs -#else -# $(GPUCC) --shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -#endif -endif - -#------------------------------------------------------------------------------- - -# Target (and build rules): Fortran include files -###$(INCDIR)/%%.inc : ../%%.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### \cp $< $@ - -#------------------------------------------------------------------------------- - -# Target (and build rules): C++ and CUDA standalone executables -$(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) - -# Target (and build rules): C++ and CUDA rwgt libraries -cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) -$(cxx_rwgtlib): $(cxx_rwgtfiles) $(cxx_objects_lib) - $(CXX) -shared -o $@ $(cxx_rwgtfiles) $(cxx_objects_lib) $(CXXLIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) - -ifneq ($(GPUCC),) -ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(cu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(cu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(cu_main): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc -endif -$(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cu_main): $(BUILDDIR)/check_sa_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o - $(GPUCC) -o $@ $(BUILDDIR)/check_sa_cu.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(RNDLIBFLAGS) -cu_rwgtfiles := $(BUILDDIR)/grwgt_runner.o $(BUILDDIR)/CurandRandomNumberKernel_cu.o $(BUILDDIR)/HiprandRandomNumberKernel_cu.o $(cu_objects_exe) -$(cu_rwgtlib): $(cu_rwgtfiles) $(cu_objects_lib) - $(GPUCC) -shared -o $@ $(cu_objects_lib) $(CULIBFLAGSRPATH2) -L$(LIBDIR) -l$(MG5AMC_COMMONLIB) -endif - -#------------------------------------------------------------------------------- - -# Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%%.o : %%.f *.inc - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(FC) -I. -c $< -o $@ - -# Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%%.o : %%.f *.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -### $(FC) -I. -I$(INCDIR) -c $< -o $@ - -# Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa.o : $(INCDIR)/fbridge.inc - -ifeq ($(UNAME_S),Darwin) -$(fcxx_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -endif -$(fcxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcxx_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) -ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ -else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa.o $(OMPFLAGS) $(BUILDDIR)/fsampler.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -endif - -ifneq ($(GPUCC),) -ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(fcu_main): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(fcu_main): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -endif -ifeq ($(UNAME_S),Darwin) -$(fcu_main): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -endif -$(fcu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(fcu_main): $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) -ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa.o $(BUILDDIR)/fsampler_cu.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) -endif -endif - -#------------------------------------------------------------------------------- - -# Target (and build rules): test objects and test executable -$(BUILDDIR)/testxxx.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testxxx.o # Comment out this line to skip the C++ test of xxx functions - -ifneq ($(GPUCC),) -$(BUILDDIR)/testxxx_cu.o: $(GTESTLIBS) -$(BUILDDIR)/testxxx_cu.o: INCFLAGS += $(GTESTINC) -$(BUILDDIR)/testxxx_cu.o: testxxx_cc_ref.txt -$(testmain): $(BUILDDIR)/testxxx_cu.o -$(testmain): cu_objects_exe += $(BUILDDIR)/testxxx_cu.o # Comment out this line to skip the CUDA test of xxx functions -endif - -$(BUILDDIR)/testmisc.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/testmisc.o # Comment out this line to skip the C++ miscellaneous tests - -ifneq ($(GPUCC),) -$(BUILDDIR)/testmisc_cu.o: $(GTESTLIBS) -$(BUILDDIR)/testmisc_cu.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/testmisc_cu.o -$(testmain): cu_objects_exe += $(BUILDDIR)/testmisc_cu.o # Comment out this line to skip the CUDA miscellaneous tests -endif - -$(BUILDDIR)/runTest.o: $(GTESTLIBS) -$(BUILDDIR)/runTest.o: INCFLAGS += $(GTESTINC) -$(testmain): $(BUILDDIR)/runTest.o -$(testmain): cxx_objects_exe += $(BUILDDIR)/runTest.o - -ifneq ($(GPUCC),) -$(BUILDDIR)/runTest_cu.o: $(GTESTLIBS) -$(BUILDDIR)/runTest_cu.o: INCFLAGS += $(GTESTINC) -ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(testmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(testmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 -$(testmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc -endif -$(testmain): $(BUILDDIR)/runTest_cu.o -$(testmain): cu_objects_exe += $(BUILDDIR)/runTest_cu.o -endif - -$(testmain): $(GTESTLIBS) -$(testmain): INCFLAGS += $(GTESTINC) -$(testmain): LIBFLAGS += -L$(GTESTLIBDIR) -lgtest -lgtest_main - -ifneq ($(OMPFLAGS),) -ifneq ($(shell $(CXX) --version | egrep '^Intel'),) -$(testmain): LIBFLAGS += -liomp5 # see #578 (not '-qopenmp -static-intel' as in https://stackoverflow.com/questions/45909648) -else ifneq ($(shell $(CXX) --version | egrep '^clang'),) -$(testmain): LIBFLAGS += -L $(shell dirname $(shell $(CXX) -print-file-name=libc++.so)) -lomp # see #604 -###else ifneq ($(shell $(CXX) --version | egrep '^Apple clang'),) -###$(testmain): LIBFLAGS += ???? # OMP is not supported yet by cudacpp for Apple clang (see #578 and #604) -else -$(testmain): LIBFLAGS += -lgomp -endif -endif - -# Bypass std::filesystem completely to ease portability on LUMI #803 -#ifneq ($(findstring hipcc,$(GPUCC)),) -#$(testmain): LIBFLAGS += -lstdc++fs -#endif - -ifeq ($(GPUCC),) # link only runTest.o -$(testmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(GTESTLIBS) - $(CXX) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) -ldl -pthread $(LIBFLAGS) -else # link both runTest.o and runTest_cu.o -$(testmain): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(testmain): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) $(GTESTLIBS) -ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -lstdc++ -lpthread -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -else - $(GPUCC) -o $@ $(cxx_objects_lib) $(cxx_objects_exe) $(cu_objects_lib) $(cu_objects_exe) -ldl $(LIBFLAGS) $(CUDATESTFLAGS) -endif -endif - -# Use target gtestlibs to build only googletest -ifneq ($(GTESTLIBS),) -gtestlibs: $(GTESTLIBS) -endif - -# Use flock (Linux only, no Mac) to allow 'make -j' if googletest has not yet been downloaded https://stackoverflow.com/a/32666215 -$(GTESTLIBS): -ifneq ($(shell which flock 2>/dev/null),) - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - flock $(BUILDDIR)/.make_test.lock $(MAKE) -C $(TESTDIR) -else - if [ -d $(TESTDIR) ]; then $(MAKE) -C $(TESTDIR); fi -endif - -#------------------------------------------------------------------------------- - -# Target: build all targets in all AVX modes (each AVX mode in a separate build directory) -# Split the avxall target into five separate targets to allow parallel 'make -j avxall' builds -# (Hack: add a fbridge.inc dependency to avxall, to ensure it is only copied once for all AVX modes) -avxnone: - @echo - $(MAKE) USEBUILDDIR=1 AVX=none -f $(CUDACPP_MAKEFILE) - -avxsse4: - @echo - $(MAKE) USEBUILDDIR=1 AVX=sse4 -f $(CUDACPP_MAKEFILE) - -avxavx2: - @echo - $(MAKE) USEBUILDDIR=1 AVX=avx2 -f $(CUDACPP_MAKEFILE) - -avx512y: - @echo - $(MAKE) USEBUILDDIR=1 AVX=512y -f $(CUDACPP_MAKEFILE) - -avx512z: - @echo - $(MAKE) USEBUILDDIR=1 AVX=512z -f $(CUDACPP_MAKEFILE) - -ifeq ($(UNAME_P),ppc64le) -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 -avxall: avxnone avxsse4 -else ifeq ($(UNAME_P),arm) -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 -avxall: avxnone avxsse4 -else -###avxall: $(INCDIR)/fbridge.inc avxnone avxsse4 avxavx2 avx512y avx512z -avxall: avxnone avxsse4 avxavx2 avx512y avx512z -endif - -#------------------------------------------------------------------------------- - -# Target: clean the builds -.PHONY: clean - -clean: -ifeq ($(USEBUILDDIR),1) - rm -rf $(BUILDDIR) -else - rm -f $(BUILDDIR)/.build.* $(BUILDDIR)/*.o $(BUILDDIR)/*.exe - rm -f $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(LIBDIR)/lib$(MG5AMC_CULIB).so -endif - $(MAKE) -C ../../src clean -f $(CUDACPP_SRC_MAKEFILE) -### rm -rf $(INCDIR) - -cleanall: - @echo - $(MAKE) USEBUILDDIR=0 clean -f $(CUDACPP_MAKEFILE) - @echo - $(MAKE) USEBUILDDIR=0 -C ../../src cleanall -f $(CUDACPP_SRC_MAKEFILE) - rm -rf build.* - -# Target: clean the builds as well as the gtest installation(s) -distclean: cleanall -ifneq ($(wildcard $(TESTDIRCOMMON)),) - $(MAKE) -C $(TESTDIRCOMMON) clean -endif - $(MAKE) -C $(TESTDIRLOCAL) clean - -#------------------------------------------------------------------------------- - -# Target: show system and compiler information -info: - @echo "" - @uname -spn # e.g. Linux nodename.cern.ch x86_64 -ifeq ($(UNAME_S),Darwin) - @sysctl -a | grep -i brand - @sysctl -a | grep machdep.cpu | grep features || true - @sysctl -a | grep hw.physicalcpu: - @sysctl -a | grep hw.logicalcpu: -else - @cat /proc/cpuinfo | grep "model name" | sort -u - @cat /proc/cpuinfo | grep "flags" | sort -u - @cat /proc/cpuinfo | grep "cpu cores" | sort -u - @cat /proc/cpuinfo | grep "physical id" | sort -u -endif - @echo "" -ifneq ($(shell which nvidia-smi 2>/dev/null),) - nvidia-smi -L - @echo "" -endif - @echo USECCACHE=$(USECCACHE) -ifeq ($(USECCACHE),1) - ccache --version | head -1 -endif - @echo "" - @echo GPUCC=$(GPUCC) -ifneq ($(GPUCC),) - $(GPUCC) --version -endif - @echo "" - @echo CXX=$(CXX) -ifneq ($(shell $(CXX) --version | grep ^clang),) - @echo $(CXX) -v - @$(CXX) -v |& egrep -v '(Found|multilib)' - @readelf -p .comment `$(CXX) -print-libgcc-file-name` |& grep 'GCC: (GNU)' | grep -v Warning | sort -u | awk '{print "GCC toolchain:",$$5}' -else - $(CXX) --version -endif - @echo "" - @echo FC=$(FC) - $(FC) --version - -#------------------------------------------------------------------------------- - -# Target: check (run the C++ test executable) -# [NB THIS IS WHAT IS USED IN THE GITHUB CI!] -ifneq ($(GPUCC),) -check: runTest cmpFcheck cmpFGcheck -else -check: runTest cmpFcheck -endif - -# Target: runTest (run the C++ test executable runTest.exe) -runTest: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/runTest.exe - -# Target: runCheck (run the C++ standalone executable check.exe, with a small number of events) -runCheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/check.exe -p 2 32 2 - -# Target: runGcheck (run the CUDA standalone executable gcheck.exe, with a small number of events) -runGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/gcheck.exe -p 2 32 2 - -# Target: runFcheck (run the Fortran standalone executable - with C++ MEs - fcheck.exe, with a small number of events) -runFcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 - -# Target: runFGcheck (run the Fortran standalone executable - with CUDA MEs - fgcheck.exe, with a small number of events) -runFGcheck: all.$(TAG) - $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 - -# Target: cmpFcheck (compare ME results from the C++ and Fortran with C++ MEs standalone executables, with a small number of events) -cmpFcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/check.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/check.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/C++) = $${me1}"; echo "Avg ME (F77/C++) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/C++) returned NaN"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/C++) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: cmpFGcheck (compare ME results from the CUDA and Fortran with CUDA MEs standalone executables, with a small number of events) -cmpFGcheck: all.$(TAG) - @echo - @echo "$(BUILDDIR)/gcheck.exe --common -p 2 32 2" - @echo "$(BUILDDIR)/fgcheck.exe 2 32 2" - @me1=$(shell $(RUNTIME) $(BUILDDIR)/gcheck.exe --common -p 2 32 2 | grep MeanMatrix | awk '{print $$4}'); me2=$(shell $(RUNTIME) $(BUILDDIR)/fgcheck.exe 2 32 2 | grep Average | awk '{print $$4}'); echo "Avg ME (C++/CUDA) = $${me1}"; echo "Avg ME (F77/CUDA) = $${me2}"; if [ "$${me2}" == "NaN" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; elif [ "$${me2}" == "" ]; then echo "ERROR! Fortran calculation (F77/CUDA) crashed"; else python3 -c "me1=$${me1}; me2=$${me2}; reldif=abs((me2-me1)/me1); print('Relative difference =', reldif); ok = reldif <= 2E-4; print ( '%%s (relative difference %%s 2E-4)' %% ( ('OK','<=') if ok else ('ERROR','>') ) ); import sys; sys.exit(0 if ok else 1)"; fi - -# Target: memcheck (run the CUDA standalone executable gcheck.exe with a small number of events through cuda-memcheck) -memcheck: all.$(TAG) - $(RUNTIME) $(CUDA_HOME)/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes $(BUILDDIR)/gcheck.exe -p 2 32 2 - -#------------------------------------------------------------------------------- From 0ba830d95e9874e281d86733349a99538aee9ecd Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 23 Sep 2024 13:00:06 +0200 Subject: [PATCH 65/76] added official licensing terms (LGPL3.0) and removed some unused code --- tools/REX/REX.cc | 16 ++++++++++------ tools/REX/REX.h | 16 ++++++++++------ tools/REX/rwgt_instance.cc | 38 -------------------------------------- tools/REX/rwgt_instance.h | 18 ------------------ tools/REX/teawREX.cc | 17 ++++++++++------- tools/REX/teawREX.h | 22 ++++++++++------------ 6 files changed, 40 insertions(+), 87 deletions(-) diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index dcc5dbbac3..288f591f1b 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -7,13 +7,17 @@ * \_| \_\____/\/ \/ * ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - +// +// *R*apid *E*vent e*X*traction Version 0.9.0 +// REX is a C++ library for parsing and manipulating Les Houches Event-format (LHE) files. +// It is designed to fast and lightweight, in comparison to internal parsers in programs like MadGraph. +// Currently, REX is in development and may not contain all features necessary for full LHE parsing; +// particularly, it can only parse existing LHE files, rather than writing completely new ones. +// // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// All rights not expressly granted are reserved. +// #ifndef _REX_CC_ #define _REX_CC_ diff --git a/tools/REX/REX.h b/tools/REX/REX.h index cf74424cb5..ec36e1c4d5 100644 --- a/tools/REX/REX.h +++ b/tools/REX/REX.h @@ -7,13 +7,17 @@ * \_| \_\____/\/ \/ * ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - +// +// *R*apid *E*vent e*X*traction Version 0.9.0 +// REX is a C++ library for parsing and manipulating Les Houches Event-format (LHE) files. +// It is designed to fast and lightweight, in comparison to internal parsers in programs like MadGraph. +// Currently, REX is in development and may not contain all features necessary for full LHE parsing; +// particularly, it can only parse existing LHE files, rather than writing completely new ones. +// // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// All rights not expressly granted are reserved. +// #ifndef _REX_H_ #define _REX_H_ diff --git a/tools/REX/rwgt_instance.cc b/tools/REX/rwgt_instance.cc index b22d1ee2a7..d456ffe6bc 100644 --- a/tools/REX/rwgt_instance.cc +++ b/tools/REX/rwgt_instance.cc @@ -141,44 +141,6 @@ namespace rwgt{ return evalScatAmps; } - instance::instance(){} - instance::instance( std::vector>& event){ - this->procEventInt = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEventInt = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void instance::setProc( std::vector>& event ){ - this->procEventInt = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event){ - this->procEventStr = event; - this->process = REX::event( event ); - } - instance::instance( std::vector>& event, REX::teaw::amplitude& amp ){ - this->procEventStr = event; - this->process = REX::event( event ); - bridgeCall = amp; - } - void instance::setProc( std::vector>& event ){ - this->procEventStr = event; - this->process = REX::event( event ); - } - void instance::setAmp( REX::teaw::amplitude& amp ){ - bridgeCall = amp; - } - std::shared_ptr> instance::ampEval( std::vector& momenta, std::vector& alphaS ){ - return bridgeCall( momenta, alphaS ); - } - std::shared_ptr> instance::ampEval( std::shared_ptr> momenta, - std::shared_ptr> alphaS ){ - return bridgeCall( *momenta, *alphaS ); - } - } #endif diff --git a/tools/REX/rwgt_instance.h b/tools/REX/rwgt_instance.h index 6c8d44ca8a..6ad34568d6 100644 --- a/tools/REX/rwgt_instance.h +++ b/tools/REX/rwgt_instance.h @@ -60,24 +60,6 @@ namespace rwgt{ std::shared_ptr> bridgeCall( std::vector& momenta, std::vector& alphaS ); }; - struct instance{ - std::vector> procEventInt; - std::vector> procEventStr; - REX::event process; - REX::teaw::amplitude bridgeCall; - instance(); - instance( std::vector>& event); - instance( std::vector>& event, REX::teaw::amplitude& amp ); - void setProc( std::vector>& event ); - instance( std::vector>& event); - instance( std::vector>& event, REX::teaw::amplitude& amp ); - void setProc( std::vector>& event ); - void setAmp( REX::teaw::amplitude& amp ); - std::shared_ptr> ampEval( std::vector& momenta, std::vector& alphaS ); - std::shared_ptr> ampEval( std::shared_ptr> momenta, - std::shared_ptr> alphaS ); - }; - } #endif \ No newline at end of file diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index de24f00721..cef75c3de4 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -7,13 +7,17 @@ * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ * ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - +// +// *t*ensorial *e*vent *a*daption *w*ith *REX* Version 0.9.0 +// teawREX is an extension to the REX C++ library for parsing and manipulating Les Houches Event-format (LHE) files, +// designed for leading order event reweighting based on input LHE file(s) and scattering amplitude functions. +// teawREX is in development and may not contain all features necessary for all desired features, +// and does not have documentation beyond the code itself. +// // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// All rights not expressly granted are reserved. +// #ifndef _TEAWREX_CC_ #define _TEAWREX_CC_ @@ -25,7 +29,6 @@ #include #include #include -//#include "REX.cc" #include "REX.h" #include "teawREX.h" diff --git a/tools/REX/teawREX.h b/tools/REX/teawREX.h index c2bb695213..2211d76184 100644 --- a/tools/REX/teawREX.h +++ b/tools/REX/teawREX.h @@ -7,13 +7,17 @@ * \__\___|\__,_| \_/\_/ \_| \_\____/\/ \/ * ***/ - -// THIS IS NOT A LICENSED RELEASE -// IF YOU SEE THIS FILE, IT HAS BEEN SPREAD -// FROM AN IMPROPER RELEASE. - +// +// *t*ensorial *e*vent *a*daption *w*ith *REX* Version 0.9.0 +// teawREX is an extension to the REX C++ library for parsing and manipulating Les Houches Event-format (LHE) files, +// designed for leading order event reweighting based on input LHE file(s) and scattering amplitude functions. +// teawREX is in development and may not contain all features necessary for all desired features, +// and does not have documentation beyond the code itself. +// // Copyright © 2023-2024 CERN, CERN Author Zenny Wettersten. -// All rights reserved. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// All rights not expressly granted are reserved. +// #ifndef _TEAWREX_H_ #define _TEAWREX_H_ @@ -31,8 +35,6 @@ namespace REX::teaw { using amplitude = std::function>(std::vector&, std::vector&)>; - //using ampCall = std::map; - //using ampPair = std::pair; using vecMap = std::map>, REX::eventComp>; struct rwgtVal : REX::paramVal{ @@ -158,18 +160,14 @@ namespace REX::teaw struct rwgtRunner : rwgtFiles{ public: void setMeEval( amplitude eval ); - //void setMeEvals( ampCall evals ); void addMeEval( const REX::event& ev, const amplitude& eval ); rwgtRunner(); rwgtRunner( rwgtFiles& rwgts ); rwgtRunner( rwgtFiles& rwgts, amplitude meCalc ); - //rwgtRunner( rwgtFiles& rwgts, ampCall& meCalcs ); rwgtRunner( rwgtFiles& rwgts, std::vector& meCalcs ); rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, amplitude meCalc ); rwgtRunner(const rwgtRunner& rwgts); - //rwgtRunner( std::string_view lhe_card, std::string_view slha_card, std::string_view reweight_card, - //ampCall meCalcs ); bool oneME(); bool singAmp(); protected: From c48fc6c90e470a4f22d9ff37aaba131f1db0898b Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Tue, 24 Sep 2024 11:42:46 +0200 Subject: [PATCH 66/76] fixed indexing issue when modifying several parameters in the same SLHA block --- tools/REX/REX.cc | 12 ++++++++++-- tools/REX/teawREX.cc | 10 +++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tools/REX/REX.cc b/tools/REX/REX.cc index 288f591f1b..942e009c6c 100644 --- a/tools/REX/REX.cc +++ b/tools/REX/REX.cc @@ -1712,8 +1712,16 @@ namespace REX } realLine = paramLine; auto vals = *blankSplitter( realLine ); - idStr = vals[0]; - valStr = vals[1]; + if( vals.size() < 2 ){ return; } + if( clStringComp(vals[0],std::string("set")) ){ + if( vals.size() < 4 ) + throw std::runtime_error("Error while parsing SLHA parameter line --- this appears to be a reweight command, but insufficient arguments were provided."); + idStr = vals[2]; + valStr = vals[3]; + } else { + idStr = vals[0]; + valStr = vals[1]; + } if( parseOnline ){ if( vals.size() > 2 ) { diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index cef75c3de4..63cb475db3 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -122,21 +122,21 @@ namespace REX::teaw void rwgtProc::parse(){ std::vector blocks; std::vector>> params; - auto procLines = *REX::lineSplitter( procString ); - for( auto line : procLines ) + auto procLines = REX::lineSplitter( procString ); + for( auto line : *procLines ) { if( line.find_first_not_of(" \n\r\f\t\v") == '#' ){ continue; } auto strtPt = line.find("set"); if( strtPt == REX::npos ){ continue; } - auto words = *REX::blankSplitter( line.substr(strtPt) ); - auto currBlock = words[1]; + auto words = REX::blankSplitter( line.substr(strtPt) ); + auto currBlock = words->at(1); auto loc = std::find_if( blocks.begin(), blocks.end(), [&]( std::string_view block ){ return (block == currBlock); } ); if( loc == blocks.end() ){ blocks.push_back( currBlock ); params.push_back( std::make_shared>( std::vector({rwgtVal( line )} ) )); } else { - params[ std::distance( blocks.begin(), loc ) - 1 ]->push_back( rwgtVal( line ) ); + params[ std::distance( blocks.begin(), loc ) ]->push_back( rwgtVal( line ) ); } } rwgtParams.reserve(blocks.size()); From 23d504be0656f5fb55dd1f219f53d8875df79e26 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Fri, 27 Sep 2024 13:40:26 +0200 Subject: [PATCH 67/76] separated tREX output into a specific reweighting plugin mode --- .../PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 4 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 126 +---------- .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 209 ------------------ tools/REX/teawREX.cc | 2 +- 4 files changed, 6 insertions(+), 335 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index 9014cdebce..3df191e2e4 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -32,16 +32,18 @@ # allows the command "output myformat PATH" in madgraph. # MYCLASS should inherit from class madgraph.iolibs.export_v4.VirtualExporter import PLUGIN.CUDACPP_OUTPUT.output as output + import PLUGIN.CUDACPP_OUTPUT.trex as trex new_output = { 'madevent_simd' : output.SIMD_ProcessExporter, 'madevent_gpu' : output.GPU_ProcessExporter, 'standalone_cudacpp' : output.PLUGIN_ProcessExporter, - 'standalone_rwgtcpp' : output.RWGT_ProcessExporter, + 'standalone_trex' : trex.TREX_ProcessExporter, # the following one are used for the second exporter class # (not really needed so far but interesting if need # specialization in the futur) 'standalone_simd' : output.SIMD_ProcessExporter, 'standalone_cuda' : output.GPU_ProcessExporter, } + new_reweight = {'trex': trex.TREX_ReweightInterface} # 2. Define new way to handle the cluster. # Example: new_cluster = {'mycluster': MYCLUSTERCLASS} diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 27acc6491b..db115b2441 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1170,8 +1170,8 @@ def get_process_class_definitions(self, write=True): replace_dict['noutcoming'] = nexternal - nincoming replace_dict['nbhel'] = self.matrix_elements[0].get_helicity_combinations() # number of helicity combinations replace_dict['ndiagrams'] = len(self.matrix_elements[0].get('diagrams')) # AV FIXME #910: elsewhere matrix_element.get('diagrams') and max(config[0]... - if( write ): - file = self.read_template_file(self.process_class_template) % replace_dict # HACK! ignore write=False case + if( write ): # ZW: added dict return for uses in child exporters. Default argument is True so no need to modify other calls to this function + file = self.read_template_file(self.process_class_template) % replace_dict file = '\n'.join( file.split('\n')[8:] ) # skip first 8 lines in process_class.inc (copyright) return file else: @@ -2174,125 +2174,3 @@ def generate_helas_call(self, argument): self.add_amplitude(argument.get_call_key(), call_function) #------------------------------------------------------------------------------------ -class PLUGIN_OneProcessExporterRwgt(PLUGIN_OneProcessExporter): - """A custom OneProcessExporter for the REX reweighting""" - - rwgt_template = 'gpu/rwgt_runner.inc' - - # ZW - rwgt functions - def get_rwgt_legs(self, process): - """Return string with particle ids and status in the REX std::pair format""" - return ",".join(["{\"%i\",\"%i\"}" % (leg.get('state'), leg.get('id')) \ - for leg in process.get('legs')]).replace('0', '-1') - - def get_rwgt_legs_vec(self, processes): - """Return string with vectors of particle ids and statuses""" - prtSets = [] - for k in range(len(processes)): - prtSets.append("{" + self.get_rwgt_legs(processes[k]) + "}") - return ",".join(prtSets) - - def get_init_prts_vec(self, process): - """Return string with initial state particle ids for use in REX event sorting""" - prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 0]) - return "{" + prts + "}" - - def get_init_prts_vecs(self, processes): - """Return string with vectors of initial state particle ids""" - prtSets = [] - for k in range(len(processes)): - prtSets.append(self.get_init_prts_vec(processes[k])) - return ",".join(prtSets) - - def get_fin_prts_vec(self, process): - """Return string with final state particle ids for use in REX event sorting""" - prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 1]) - return "{" + prts + "}" - - def get_fin_prts_vecs(self, processes): - """Return string with vectors of final state particle ids""" - prtSets = [] - for k in range(len(processes)): - prtSets.append(self.get_fin_prts_vec(processes[k])) - return ",".join(prtSets) - - def get_rwgt_procMap(self, process): - """Return string with particle states and order in the REX procMap format""" - currState = False - retString = "thisProc{{\"-1\",{" - for leg in process.get('legs'): - if currState == leg.get('state'): - retString += "\"%i\"," % leg.get('id') - else: - currState = leg.get('state') - retString += "}},{\"1\",{\"%i,\"" % leg.get('id') - retString = retString[:-1] + "}}}" - return retString - - def get_proc_dir(self): - """Return process directory name for the current process""" - return "P%d_%s" % (self.process_number, self.process_name) - - def get_rwgt_runner(self): - """Return string to initialise the rwgtRunners in teawREX""" - return "%s::runner" % (self.get_proc_dir()) - - def get_rwgt_includes(self): - """Return string with the include directives for the REX reweighting""" - return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) - - def write_rwgt_header(self): - """Writes a simple rwgt_runner.h file to forward declare the runner object""" - # Adjust the placeholders for use with `.format()` - rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H - #define {namespace}_RWGT_RUNNER_H - #include \"rwgt_instance.h\" - namespace {namespace} {{ - extern rwgt::instance runner; - }} - #endif""".format(namespace=self.get_proc_dir()) - - # Using `with` statement for better file handling - with open(os.path.join(self.path, 'rwgt_runner.h'), 'w') as ff: - ff.write(rwgt_h) - - def edit_rwgt_header(self): - """Adds process-specific details to the rwgt_runner.h template""" - replace_dict = super().get_process_class_definitions(write=False) - replace_dict['process_namespace'] = self.get_proc_dir() - replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() - template = open(pjoin(self.template_path,'REX', 'rwgt_runner.h'),'r').read() - ff = open(pjoin(self.path, 'rwgt_runner.h'),'w') - ff.write(template % replace_dict) - ff.close() - - def edit_rwgt_runner(self): - """Create the rwgt_runner.cc file for the REX reweighting""" - ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') - # Create the rwgt_runner.cc file -# replace_dict = {} - replace_dict = super().get_process_class_definitions(write=False) -# rwgt_runner = self.get_proc_dir() + self.rwgt_template - replace_dict['process_namespace'] = self.get_proc_dir() - replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() - replace_dict['init_prt_ids'] = self.get_init_prts_vecs(self.matrix_elements[0].get('processes')) - replace_dict['fin_prt_ids'] = self.get_fin_prts_vecs(self.matrix_elements[0].get('processes')) - replace_dict['process_events'] = self.get_rwgt_legs_vec(self.matrix_elements[0].get('processes')) - replace_dict['no_events'] = len(self.matrix_elements[0].get('processes')) - template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() - ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') - ff.write(template % replace_dict) - ff.close() - - # ZW - override the PLUGIN method to generate the rwgt_runner.cc file as well - # note: also generating standard check_sa.cc and gcheck_sa.cu files, which - # are not used in the REX reweighting - def generate_process_files(self): - """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" - # misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') - super().generate_process_files() - # misc.sprint('Generating rwgt_runner files') - self.edit_rwgt_header() - self.edit_rwgt_runner() - # misc.sprint('Finished generating rwgt files') - diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 20b5846555..df4c514d29 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -412,212 +412,3 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): return out #------------------------------------------------------------------------------------ - -class RWGT_ProcessExporter(PLUGIN_ProcessExporter): - - oneprocessclass = model_handling.PLUGIN_OneProcessExporterRwgt - - rwgt_names = [] - proc_lines = [] - - s = PLUGINDIR + '/madgraph/iolibs/template_files/' - from_template = dict(PLUGIN_ProcessExporter.from_template) - from_template['src'] = from_template['src'] + [s+'REX/REX.cc', s+'REX/teawREX.cc', - s+'REX/REX.h', s+'REX/teawREX.h', - s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'] - from_template['SubProcesses'] = from_template['SubProcesses'] + [s+'gpu/cudacpp_driver.mk', - s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'] - # from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', - # s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], - # 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], - # 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', - # s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', - # s+'gpu/constexpr_math.h', - # s+'gpu/cudacpp_config.mk', - # s+'CMake/src/CMakeLists.txt', - # s+'REX/REX.cc', s+'REX/teawREX.cc', - # s+'REX/REX.h', s+'REX/teawREX.h', - # s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc' ], - # 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', - # s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', - # s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', - # s+'gpu/MemoryAccessMatrixElements.h', s+'gpu/MemoryAccessMomenta.h', - # s+'gpu/MemoryAccessRandomNumbers.h', s+'gpu/MemoryAccessWeights.h', - # s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h', - # s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h', - # s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h', - # s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h', - # s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', - # s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', - # s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', - # s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', - # s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', - # s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', - # s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', - # s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', - # s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', - # s+'gpu/perf.py', s+'gpu/profile.sh', - # s+'CMake/SubProcesses/CMakeLists.txt', - # s+'gpu/cudacpp_driver.mk', - # s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], - # 'test': [s+'gpu/cudacpp_test.mk']} - - to_link_in_P = PLUGIN_ProcessExporter.to_link_in_P + ['rwgt_instance.h', 'REX.h', 'teawREX.h'] - - # to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', - # 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', - # 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', - # 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', - # 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', - # 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', - # 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', - # 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', - # 'EventStatistics.h', 'CommonRandomNumbers.h', - # 'CrossSectionKernels.cc', 'CrossSectionKernels.h', - # 'MatrixElementKernels.cc', 'MatrixElementKernels.h', - # 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', - # 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', - # 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', - # 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', - # 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', - # 'MadgraphTest.h', 'runTest.cc', - # 'testmisc.cc', 'testxxx_cc_ref.txt', - # 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 - # 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 - # 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 - # 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 - # 'perf.py', 'profile.sh', - # 'rwgt_instance.h', 'REX.h', 'teawREX.h'] - -# s = PLUGINDIR + '/madgraph/iolibs/template_files/' -# from_template = {'.': [s+'.clang-format', s+'CMake/CMakeLists.txt', -# s+'COPYRIGHT', s+'COPYING', s+'COPYING.LESSER' ], -# 'CMake': [s+'CMake/Compilers.txt', s+'CMake/Platforms.txt', s+'CMake/Macros.txt'], -# 'src': [s+'gpu/rambo.h', s+'read_slha.h', s+'read_slha.cc', -# s+'gpu/mgOnGpuFptypes.h', s+'gpu/mgOnGpuCxtypes.h', s+'gpu/mgOnGpuVectors.h', -# s+'CMake/src/CMakeLists.txt', -# s+'REX/REX.cc', s+'REX/teawREX.cc', -# s+'REX/REX.h', s+'REX/teawREX.h', -# s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'], -# 'SubProcesses': [s+'gpu/nvtx.h', s+'gpu/timer.h', s+'gpu/timermap.h', -# s+'gpu/ompnumthreads.h', s+'gpu/GpuRuntime.h', s+'gpu/GpuAbstraction.h', -# s+'gpu/MemoryAccessHelpers.h', s+'gpu/MemoryAccessVectors.h', -# s+'gpu/MemoryAccessMatrixElements.h', s+'gpu/MemoryAccessMomenta.h', -# s+'gpu/MemoryAccessRandomNumbers.h', s+'gpu/MemoryAccessWeights.h', -# s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h', -# s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h', -# s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h', -# s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h', -# s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', -# s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', -# s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', -# s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', -# s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', -# s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', -# s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', -# s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', -# s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', -# s+'gpu/perf.py', s+'gpu/profile.sh', -# s+'CMake/SubProcesses/CMakeLists.txt', -# s+'gpu/cudacpp_rex_driver.mk', -# s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'], -# 'test': [s+'gpu/cudacpp_test.mk']} - -# # from_template['SubProcesses'].append(s+'REX/rwgt_instance.h') -# # from_template['SubProcesses'].append(s+'REX/REX.hpp') -# # from_template['SubProcesses'].append(s+'REX/teawREX.hpp') -# # from_template['SubProcesses'].append(s+'gpu/cudacpp_rex_driver.mk') - -# to_link_in_P = ['nvtx.h', 'timer.h', 'timermap.h', -# 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', -# 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', -# 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', -# 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', -# 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', -# 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', -# 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', -# 'EventStatistics.h', 'CommonRandomNumbers.h', -# 'CrossSectionKernels.cc', 'CrossSectionKernels.h', -# 'MatrixElementKernels.cc', 'MatrixElementKernels.h', -# 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', -# 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', -# 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', -# 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', -# 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', -# 'MadgraphTest.h', 'runTest.cc', -# 'testmisc.cc', 'testxxx_cc_ref.txt', -# 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 -# 'testxxx.cc', # this is generated from a template in Subprocesses but we still link it in P1 -# 'MemoryBuffers.h', # this is generated from a template in Subprocesses but we still link it in P1 -# 'MemoryAccessCouplings.h', # this is generated from a template in Subprocesses but we still link it in P1 -# 'perf.py', 'profile.sh', -# 'rwgt_instance.h', 'REX.h', 'teawREX.h'] - -# to_link_in_P.append('rwgt_instance.h') -# to_link_in_P.append('REX.hpp') -# to_link_in_P.append('teawREX.hpp') - - template_src_make = pjoin(PLUGINDIR, 'madgraph' ,'iolibs', 'template_files','gpu','cudacpp_rex_src.mk') - template_tst_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_test.mk') - template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_runner.mk') - - # def generate_subprocess_directory(self, subproc_group, fortran_model, me=None): - # misc.sprint('Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory)') - # misc.sprint(' type(subproc_group)=%s'%type(subproc_group)) # e.g. madgraph.core.helas_objects.HelasMatrixElement - # misc.sprint(' type(fortran_model)=%s'%type(fortran_model)) # e.g. madgraph.iolibs.helas_call_writers.GPUFOHelasCallWriter - # misc.sprint(' type(me)=%s me=%s'%(type(me) if me is not None else None, me)) # e.g. int - # return super().generate_subprocess_directory(subproc_group, fortran_model, me) - - def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, - proc_number=None): - """Generate the Pxxxxx directory for a subprocess in C++ standalone, - including the necessary .h and .cc files""" - - - process_exporter_cpp = self.oneprocessclass(matrix_element,cpp_helas_call_writer) - - self.rwgt_names.append("P%d_%s" % (process_exporter_cpp.process_number, - process_exporter_cpp.process_name)) - - process_lines = "\n".join([process_exporter_cpp.get_process_info_lines(me) for me in \ - process_exporter_cpp.matrix_elements]) - self.proc_lines.append(process_lines) - - # Create the directory PN_xx_xxxxx in the specified path - dirpath = pjoin(self.dir_path, 'SubProcesses', "P%d_%s" % (process_exporter_cpp.process_number, - process_exporter_cpp.process_name)) - try: - os.mkdir(dirpath) - except os.error as error: - logger.warning(error.strerror + " " + dirpath) - - with misc.chdir(dirpath): - logger.info('Creating files in directory %s' % dirpath) - process_exporter_cpp.path = dirpath - # Create the process .h and .cc files - process_exporter_cpp.generate_process_files() - for file in self.to_link_in_P: - files.ln('../%s' % file) - return - - def export_driver(self): - # misc.sprint("In export_driver") - # misc.sprint("Current working directory is: %s" % self.dir_path) - replace_dict = {} - replace_dict['info_lines'] = PLUGIN_export_cpp.get_mg5_info_lines() - replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) - replace_dict['include_lines'] = '' - replace_dict['run_set'] = '' - replace_dict['fbridge_vec'] = '' - for name in self.rwgt_names: - replace_dict['include_lines'] += '#include "%s/rwgt_runner.h"\n' % name - replace_dict['run_set'] += '%s::getEventSet(),' % name - replace_dict['fbridge_vec'] += '%s::bridgeConstr(),' % name - replace_dict['run_set'] = replace_dict['run_set'][:-1] - replace_dict['fbridge_vec'] = replace_dict['fbridge_vec'][:-1] - template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) - template = open(pjoin(template_path,'REX', 'rwgt_driver.inc'),'r').read() - ff = open(pjoin(self.dir_path, 'SubProcesses', 'rwgt_driver.cc'),'w') - ff.write(template % replace_dict) - ff.close() - diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 63cb475db3..99f6d5c59a 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -199,7 +199,7 @@ namespace REX::teaw auto namePos = locOpts.find( "rwgt_name" ); if( namePos != REX::npos ){ auto endName = locOpts.find_first_of( " \n\r\f\t\v", namePos ); - rwgtNames->push_back( std::string( locOpts.substr( namePos + 9, endName - namePos - 9 ) ) ); + rwgtNames->push_back( std::string( locOpts.substr( namePos + 10, endName - namePos - 10 ) ) ); } else { rwgtNames->push_back( "rwgt_" + std::to_string( k + 1 ) ); } From 36b713960728aab15d6de86a2c89f4b9875d706f Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Fri, 27 Sep 2024 13:41:06 +0200 Subject: [PATCH 68/76] added file storing all the functionality for tREX output --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py | 679 ++++++++++++++++++ 1 file changed, 679 insertions(+) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py new file mode 100644 index 0000000000..a0585ec125 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py @@ -0,0 +1,679 @@ +# Copyright (C) 2023-2024 CERN. +# Licensed under the GNU Lesser General Public License (version 3 or later). +# Created by: Z. Wettersten (Sep 2024) for the MG5aMC CUDACPP plugin. + +import os +import subprocess +import re +import sys +import importlib.util +SPEC_EXPORTCPP = importlib.util.find_spec('madgraph.iolibs.export_cpp') +PLUGIN_export_cpp = importlib.util.module_from_spec(SPEC_EXPORTCPP) +SPEC_EXPORTCPP.loader.exec_module(PLUGIN_export_cpp) +sys.modules['PLUGIN.CUDACPP_OUTPUT.PLUGIN_export_cpp'] = PLUGIN_export_cpp # allow 'import PLUGIN.CUDACPP_OUTPUT.PLUGIN_export_cpp' in model_handling.py +del SPEC_EXPORTCPP +###print('id(export_cpp)=%s'%id(export_cpp)) +###print('id(PLUGIN_export_cpp)=%s'%id(PLUGIN_export_cpp)) + +# AV - use template files from PLUGINDIR instead of MG5DIR +###from madgraph import MG5DIR +PLUGINDIR = os.path.dirname( __file__ ) + +# AV - model_handling includes the custom FileWriter, ALOHAWriter, UFOModelConverter, OneProcessExporter and HelasCallWriter, plus additional patches +import PLUGIN.CUDACPP_OUTPUT.model_handling as model_handling +import PLUGIN.CUDACPP_OUTPUT.output as output + +# AV - create a plugin-specific logger +import logging +logger = logging.getLogger('madgraph.PLUGIN.CUDACPP_OUTPUT.output') +from madgraph import MG5DIR +#------------------------------------------------------------------------------------ + +from os.path import join as pjoin +import madgraph +import madgraph.iolibs.files as files +import madgraph.iolibs.export_v4 as export_v4 +import madgraph.various.misc as misc +import madgraph.interface.reweight_interface as rwgt_interface +import madgraph.various.banner as banner +import models.check_param_card as check_param_card +import madgraph.interface.extended_cmd as extended_cmd +import madgraph.interface.common_run_interface as common_run_interface + +from . import launch_plugin + +class TREX_OneProcessExporter(model_handling.PLUGIN_OneProcessExporter): + """A custom OneProcessExporter for the TREX reweighting""" + + rwgt_template = 'gpu/rwgt_runner.inc' + + # ZW - rwgt functions + def get_rwgt_legs(self, process): + """Return string with particle ids and status in the REX std::pair format""" + return ",".join(["{\"%i\",\"%i\"}" % (leg.get('state'), leg.get('id')) \ + for leg in process.get('legs')]).replace('0', '-1') + + def get_rwgt_legs_vec(self, processes): + """Return string with vectors of particle ids and statuses""" + prtSets = [] + for k in range(len(processes)): + prtSets.append("{" + self.get_rwgt_legs(processes[k]) + "}") + return ",".join(prtSets) + + def get_init_prts_vec(self, process): + """Return string with initial state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 0]) + return "{" + prts + "}" + + def get_init_prts_vecs(self, processes): + """Return string with vectors of initial state particle ids""" + prtSets = [] + for k in range(len(processes)): + prtSets.append(self.get_init_prts_vec(processes[k])) + return ",".join(prtSets) + + def get_fin_prts_vec(self, process): + """Return string with final state particle ids for use in REX event sorting""" + prts = ",".join(["\"%i\"" % leg.get('id') for leg in process.get('legs') if leg.get('state') == 1]) + return "{" + prts + "}" + + def get_fin_prts_vecs(self, processes): + """Return string with vectors of final state particle ids""" + prtSets = [] + for k in range(len(processes)): + prtSets.append(self.get_fin_prts_vec(processes[k])) + return ",".join(prtSets) + + def get_rwgt_procMap(self, process): + """Return string with particle states and order in the REX procMap format""" + currState = False + retString = "thisProc{{\"-1\",{" + for leg in process.get('legs'): + if currState == leg.get('state'): + retString += "\"%i\"," % leg.get('id') + else: + currState = leg.get('state') + retString += "}},{\"1\",{\"%i,\"" % leg.get('id') + retString = retString[:-1] + "}}}" + return retString + + def get_proc_dir(self): + """Return process directory name for the current process""" + return "P%d_%s" % (self.process_number, self.process_name) + + def get_rwgt_runner(self): + """Return string to initialise the rwgtRunners in teawREX""" + return "%s::runner" % (self.get_proc_dir()) + + def get_rwgt_includes(self): + """Return string with the include directives for the REX reweighting""" + return "#include \"P%d_%s/rwgt_runner.cc\"" % (self.process_number, self.process_name) + + def write_rwgt_header(self): + """Writes a simple rwgt_runner.h file to forward declare the runner object""" + # Adjust the placeholders for use with `.format()` + rwgt_h = """#ifndef {namespace}_RWGT_RUNNER_H + #define {namespace}_RWGT_RUNNER_H + #include \"rwgt_instance.h\" + namespace {namespace} {{ + extern rwgt::instance runner; + }} + #endif""".format(namespace=self.get_proc_dir()) + + # Using `with` statement for better file handling + with open(os.path.join(self.path, 'rwgt_runner.h'), 'w') as ff: + ff.write(rwgt_h) + + def edit_rwgt_header(self): + """Adds process-specific details to the rwgt_runner.h template""" + replace_dict = super().get_process_class_definitions(write=False) + replace_dict['process_namespace'] = self.get_proc_dir() + replace_dict['info_lines'] = model_handling.PLUGIN_export_cpp.get_mg5_info_lines() + template = open(pjoin(self.template_path,'REX', 'rwgt_runner.h'),'r').read() + ff = open(pjoin(self.path, 'rwgt_runner.h'),'w') + ff.write(template % replace_dict) + ff.close() + + def edit_rwgt_runner(self): + """Create the rwgt_runner.cc file for the REX reweighting""" + ###misc.sprint('Entering PLUGIN_OneProcessExporterRwgt.edit_rwgt_runner') + # Create the rwgt_runner.cc file +# replace_dict = {} + replace_dict = super().get_process_class_definitions(write=False) +# rwgt_runner = self.get_proc_dir() + self.rwgt_template + replace_dict['process_namespace'] = self.get_proc_dir() + replace_dict['info_lines'] = model_handling.PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['init_prt_ids'] = self.get_init_prts_vecs(self.matrix_elements[0].get('processes')) + replace_dict['fin_prt_ids'] = self.get_fin_prts_vecs(self.matrix_elements[0].get('processes')) + replace_dict['process_events'] = self.get_rwgt_legs_vec(self.matrix_elements[0].get('processes')) + replace_dict['no_events'] = len(self.matrix_elements[0].get('processes')) + template = open(pjoin(self.template_path,'REX', 'rwgt_runner.inc'),'r').read() + ff = open(pjoin(self.path, 'rwgt_runner.cc'),'w') + ff.write(template % replace_dict) + ff.close() + + # ZW - override the PLUGIN method to generate the rwgt_runner.cc file as well + # note: also generating standard check_sa.cc and gcheck_sa.cu files, which + # are not used in the REX reweighting + def generate_process_files(self): + """Generate mgOnGpuConfig.h, CPPProcess.cc, CPPProcess.h, check_sa.cc, gXXX.cu links""" + # misc.sprint('Entering RWGT_OneProcessExporter.generate_process_files') + super().generate_process_files() + # misc.sprint('Generating rwgt_runner files') + self.edit_rwgt_header() + self.edit_rwgt_runner() + # misc.sprint('Finished generating rwgt files') + +class TREX_ProcessExporter(output.PLUGIN_ProcessExporter): + + oneprocessclass = TREX_OneProcessExporter + + rwgt_names = [] + proc_lines = [] + + s = PLUGINDIR + '/madgraph/iolibs/template_files/' + from_template = dict(output.PLUGIN_ProcessExporter.from_template) + from_template['src'] = from_template['src'] + [s+'REX/REX.cc', s+'REX/teawREX.cc', + s+'REX/REX.h', s+'REX/teawREX.h', + s+'REX/rwgt_instance.h', s+'REX/rwgt_instance.cc'] + from_template['SubProcesses'] = from_template['SubProcesses'] + [s+'gpu/cudacpp_driver.mk', + s+'REX/rwgt_instance.h', s+'REX/REX.h', s+'REX/teawREX.h'] + + to_link_in_P = output.PLUGIN_ProcessExporter.to_link_in_P + ['rwgt_instance.h', 'REX.h', 'teawREX.h'] + + template_src_make = pjoin(PLUGINDIR, 'madgraph' ,'iolibs', 'template_files','gpu','cudacpp_rex_src.mk') + template_tst_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_test.mk') + template_Sub_make = pjoin(PLUGINDIR, 'madgraph', 'iolibs', 'template_files','gpu','cudacpp_runner.mk') + + def generate_subprocess_directory(self, matrix_element, cpp_helas_call_writer, + proc_number=None): + """Generate the Pxxxxx directory for a subprocess in C++ standalone, + including the necessary .h and .cc files""" + + + process_exporter_cpp = self.oneprocessclass(matrix_element,cpp_helas_call_writer) + + self.rwgt_names.append("P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + + process_lines = "\n".join([process_exporter_cpp.get_process_info_lines(me) for me in \ + process_exporter_cpp.matrix_elements]) + self.proc_lines.append(process_lines) + + # Create the directory PN_xx_xxxxx in the specified path + dirpath = pjoin(self.dir_path, 'SubProcesses', "P%d_%s" % (process_exporter_cpp.process_number, + process_exporter_cpp.process_name)) + try: + os.mkdir(dirpath) + except os.error as error: + logger.warning(error.strerror + " " + dirpath) + + with misc.chdir(dirpath): + logger.info('Creating files in directory %s' % dirpath) + process_exporter_cpp.path = dirpath + # Create the process .h and .cc files + process_exporter_cpp.generate_process_files() + for file in self.to_link_in_P: + files.ln('../%s' % file) + return + + def export_driver(self): + # misc.sprint("In export_driver") + # misc.sprint("Current working directory is: %s" % self.dir_path) + replace_dict = {} + replace_dict['info_lines'] = model_handling.PLUGIN_export_cpp.get_mg5_info_lines() + replace_dict['multiprocess_lines'] = "\n".join(self.proc_lines) + replace_dict['include_lines'] = '' + replace_dict['run_set'] = '' + replace_dict['fbridge_vec'] = '' + for name in self.rwgt_names: + replace_dict['include_lines'] += '#include "%s/rwgt_runner.h"\n' % name + replace_dict['run_set'] += '%s::getEventSet(),' % name + replace_dict['fbridge_vec'] += '%s::bridgeConstr(),' % name + replace_dict['run_set'] = replace_dict['run_set'][:-1] + replace_dict['fbridge_vec'] = replace_dict['fbridge_vec'][:-1] + template_path = os.path.join( PLUGINDIR, 'madgraph', 'iolibs', 'template_files' ) + template = open(pjoin(template_path,'REX', 'rwgt_driver.inc'),'r').read() + ff = open(pjoin(self.dir_path, 'SubProcesses', 'rwgt_driver.cc'),'w') + ff.write(template % replace_dict) + ff.close() + + def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): + self.export_driver() + return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) + +class TREX_ReweightInterface(rwgt_interface.ReweightInterface): + """A custom ReweightInterface for the TREX reweighting""" + + sa_class = 'standalone_trex' + + def __init__(self, *args, **kwargs): + """Initialise the TREX reweighting interface + Currently no (substantial) changes compared to upstream are necessary, + but adding an __init__ method allows for future modifications""" + super().__init__(*args, **kwargs) + self.debug_output = 'tREX_debug' + self.param_card = None + self.reweight_card = [] + self.reweight_names = [] + + def setup_f2py_interface(self): + """"Override native setup_f2py_interface to avoid parsing things not necessary for TREX reweighting""" + + self.create_standalone_directory() + self.compile() + + def launch_actual_reweighting(self, *args, **kwargs): + """override standard launch command to instead call the TREX reweighting""" + + import csv + + if self.rwgt_dir: + path_me =self.rwgt_dir + else: + path_me = self.me_dir + + if self.second_model or self.second_process or self.dedicated_path: + rw_dir = pjoin(path_me, 'rw_me_%s' % self.nb_library) + else: + rw_dir = pjoin(path_me, 'rw_me') + + run_path = pjoin(rw_dir, 'SubProcesses') + input_file = os.path.relpath(self.lhe_input.path, run_path) + output_file = input_file + 'rw' + output_path = self.lhe_input.path + 'rw' + param_card = pjoin(rw_dir, 'Cards', 'param_card.dat') + + #ZW: Exceptions, making sure all the necessary files for teawREX are accessible + if( misc.is_executable(pjoin(run_path,'rwgt_driver_gpu.exe')) ): + driver = pjoin(run_path, 'rwgt_driver_gpu.exe') + elif(misc.is_executable(pjoin(run_path,'rwgt_driver_cpp.exe')) ): + driver = pjoin(run_path,'rwgt_driver_cpp.exe') + else: + raise Exception('No teawREX driver found for parallel reweighting') + if not os.path.exists(param_card): + try: + files.cp(os.path.join(path_me, 'Cards', 'param_card_default.dat'), param_card) + except: + raise Exception("No param_card.dat file found in %s" % pjoin(path_me, 'Cards')) + param_path = os.path.relpath(param_card, run_path) + + rwgt_card = os.path.join(path_me, 'Cards', 'reweight_card.dat') + + self.write_reweight_card(rwgt_card) + + if not os.path.exists(rwgt_card): + try: + files.cp(os.path.join(path_me, 'Cards', 'reweight_card_default.dat'), rwgt_card) + except: + raise Exception("No reweight_card.dat file found in %s" % pjoin(path_me, 'Cards')) + rwgt_path = os.path.relpath(rwgt_card, run_path) + target = '' + if not self.mother: + name, ext = self.lhe_input.name.rsplit('.',1) + target = '%s_out.%s' % (name, ext) + elif self.output_type != "default" : + target = pjoin(self.mother.me_dir, 'Events', self.mother.run_name, 'events.lhe') + else: + target = self.lhe_input.path + + #ZW: rwgt_driver is written and compiled properly, now just to figure out how to run it through MG + subprocess.call([driver, '-lhe=%s' % input_file, '-slha=%s' % param_card, '-rwgt=%s' % rwgt_card, '-out=%s' % output_file], cwd=run_path) + + files.mv(output_path, target) + csv_file = pjoin(run_path, 'rwgt_results.csv') + with open(csv_file, newline='') as results: + iters = csv.reader(results) + for row in iters: + self.all_cross_section[(row[0],'')] = (float(row[1]), float(row[2])) + + return + + def compile(self): + """override compile to use the TREX makefiles""" + + if self.multicore=='wait': + return + + if not self.rwgt_dir: + path_me = self.me_dir + else: + path_me = self.rwgt_dir + + rwgt_dir_possibility = ['rw_me','rw_me_%s' % self.nb_library,'rw_mevirt','rw_mevirt_%s' % self.nb_library] + for onedir in rwgt_dir_possibility: + if not os.path.isdir(pjoin(path_me,onedir)): + continue + pdir = pjoin(path_me, onedir, 'SubProcesses') + if self.mother: + nb_core = self.mother.options['nb_core'] if self.mother.options['run_mode'] !=0 else 1 + else: + nb_core = 1 + files.cp(pjoin(pdir, 'cudacpp_driver.mk'),pjoin(pdir, 'makefile')) + misc.compile(cwd=pdir, nb_core=nb_core,mode='cpp') + return + + def load_module(self): + """override load_module since we do not use it""" + return + + # def import_command_file(self, filepath): + # """override import_command_file to simply launch TREX""" + # self.exec_cmd('launch', precmd=True) + # return + + def do_launch(self, line): + """override do_launch to instead overwrite the reweight_card + to fit the expected input for TREX without having to extend TREX itself""" + args = self.split_arg(line) + opts = self.check_launch(args) + mgcmd = self.mg5cmd + if opts['rwgt_name']: + self.options['rwgt_name'] = opts['rwgt_name'] + if opts['rwgt_info']: + self.options['rwgt_info'] = opts['rwgt_info'] + model_line = self.banner.get('proc_card', 'full_model_line') + + # TV: Load model: needed for the combine_ij function: maybe not needed everyt time??? + model = self.banner.get('proc_card', 'model') + self.load_model( model, True, False) + + if not self.has_standalone_dir: + out = self.setup_f2py_interface() + if out: + return + + if not self.param_card: + s_orig = self.banner['slha'] + self.param_card = check_param_card.ParamCard(s_orig.splitlines()) + + # get the mode of reweighting #LO/NLO/NLO_tree/... + type_rwgt = self.get_weight_names() + + if self.rwgt_dir: + path_me =self.rwgt_dir + else: + path_me = self.me_dir + + + # get iterator over param_card and the name associated to the current reweighting. + param_card_iterator, tag_name = self.handle_param_card(model_line, args, type_rwgt) + + self.reweight_names.append(tag_name) + + # perform the scanning + if param_card_iterator: + if self.options['rwgt_name']: + reweight_name = self.options['rwgt_name'].rsplit('_',1)[0] # to avoid side effect during the scan + else: + reweight_name = None + for i,card in enumerate(param_card_iterator): + if reweight_name: + self.options['rwgt_name'] = '%s_%s' % (reweight_name, i+1) + self.new_param_card = card + #card.write(pjoin(rw_dir, 'Cards', 'param_card.dat')) + self.exec_cmd("launch --keep_card", printcmd=False, precmd=True) + + def check_multicore(self): + """override check_multicore to overloading the CPU (we never want to run TREX in multicore mode)""" + return False + + def handle_param_card(self, model_line, args, type_rwgt): + """override handle_param_card to get rid of all the unnecessary checks and file writing + now simply loads the param_card and uses get_diff to tranlate into internal format""" + + if self.rwgt_dir: + path_me =self.rwgt_dir + else: + path_me = self.me_dir + + if self.second_model or self.second_process or self.dedicated_path: + rw_dir = pjoin(path_me, 'rw_me_%s' % self.nb_library) + else: + rw_dir = pjoin(path_me, 'rw_me') + if not '--keep_card' in args: + if self.has_nlo and self.rwgt_mode != "LO": + rwdir_virt = rw_dir.replace('rw_me', 'rw_mevirt') + with open(pjoin(rw_dir, 'Cards', 'param_card.dat'), 'w') as fsock: + fsock.write(self.banner['slha']) + out, cmd = common_run_interface.CommonRunCmd.ask_edit_card_static(cards=['param_card.dat'], + ask=self.ask, pwd=rw_dir, first_cmd=self.stored_line, + write_file=False, return_instance=True + ) + self.stored_line = None + card = cmd.param_card + new_card = card.write() + elif self.new_param_card: + new_card = self.new_param_card.write() + else: + new_card = open(pjoin(rw_dir, 'Cards', 'param_card.dat')).read() + + # check for potential scan in the new card + pattern_scan = re.compile(r'''^(decay)?[\s\d]*scan''', re.I+re.M) + param_card_iterator = [] + if pattern_scan.search(new_card): + import madgraph.interface.extended_cmd as extended_cmd + try: + import internal.extended_cmd as extended_internal + Shell_internal = extended_internal.CmdShell + except: + Shell_internal = None + if not isinstance(self.mother, (extended_cmd.CmdShell, Shell_internal)): + raise Exception("scan are not allowed on the Web") + # at least one scan parameter found. create an iterator to go trough the cards + main_card = check_param_card.ParamCardIterator(new_card) + if self.options['rwgt_name']: + self.options['rwgt_name'] = '%s_0' % self.options['rwgt_name'] + + param_card_iterator = main_card + first_card = param_card_iterator.next(autostart=True) + new_card = first_card.write() + self.new_param_card = first_card + #first_card.write(pjoin(rw_dir, 'Cards', 'param_card.dat')) + + # check if "Auto" is present for a width parameter) + if 'block' not in new_card.lower(): + raise Exception(str(new_card)) + tmp_card = new_card.lower().split('block',1)[1] + if "auto" in tmp_card: + if param_card_iterator: + first_card.write(pjoin(rw_dir, 'Cards', 'param_card.dat')) + else: + ff = open(pjoin(rw_dir, 'Cards', 'param_card.dat'),'w') + ff.write(new_card) + ff.close() + + self.mother.check_param_card(pjoin(rw_dir, 'Cards', 'param_card.dat')) + new_card = open(pjoin(rw_dir, 'Cards', 'param_card.dat')).read() + + + # Find new tag in the banner and add information if needed + if 'initrwgt' in self.banner and self.output_type == 'default': + if 'name=\'mg_reweighting\'' in self.banner['initrwgt']: + blockpat = re.compile(r'''(?P.*?)''', re.I+re.M+re.S) + before, content, after = blockpat.split(self.banner['initrwgt']) + header_rwgt_other = before + after + pattern = re.compile('\\d+)|(?P[_\\w\\-\\.]+))(?P\\s*|_\\w+)\'>(?P.*?)', re.S+re.I+re.M) + mg_rwgt_info = pattern.findall(content) + maxid = 0 + for k,(i, fulltag, nlotype, diff) in enumerate(mg_rwgt_info): + if i: + if int(i) > maxid: + maxid = int(i) + mg_rwgt_info[k] = (i, nlotype, diff) # remove the pointless fulltag tag + else: + mg_rwgt_info[k] = (fulltag, nlotype, diff) # remove the pointless id tag + + maxid += 1 + rewgtid = maxid + if self.options['rwgt_name']: + #ensure that the entry is not already define if so overwrites it + for (i, nlotype, diff) in mg_rwgt_info[:]: + for flag in type_rwgt: + if 'rwgt_%s' % i == '%s%s' %(self.options['rwgt_name'],flag) or \ + i == '%s%s' % (self.options['rwgt_name'], flag): + logger.warning("tag %s%s already defines, will replace it", self.options['rwgt_name'],flag) + mg_rwgt_info.remove((i, nlotype, diff)) + + else: + header_rwgt_other = self.banner['initrwgt'] + mg_rwgt_info = [] + rewgtid = 1 + else: + self.banner['initrwgt'] = '' + header_rwgt_other = '' + mg_rwgt_info = [] + rewgtid = 1 + + # add the reweighting in the banner information: + #starts by computing the difference in the cards. + #s_orig = self.banner['slha'] + #self.orig_param_card_text = s_orig + s_new = new_card + self.new_param_card = check_param_card.ParamCard(s_new.splitlines()) + + #define tag for the run + if self.options['rwgt_name']: + tag = self.options['rwgt_name'] + else: + tag = str(rewgtid) + + if 'rwgt_info' in self.options and self.options['rwgt_info']: + card_diff = self.options['rwgt_info'] + for name in type_rwgt: + mg_rwgt_info.append((tag, name, self.options['rwgt_info'])) + elif not self.second_model and not self.dedicated_path: + old_param = self.param_card + new_param = self.new_param_card + card_diff = old_param.create_diff(new_param) + if card_diff == '' and not self.second_process: + logger.warning(' REWEIGHTING: original card and new card are identical.') + try: + if old_param['sminputs'].get(3)- new_param['sminputs'].get(3) > 1e-3 * new_param['sminputs'].get(3): + logger.warning("We found different value of alpha_s. Note that the value of alpha_s used is the one associate with the event and not the one from the cards.") + except Exception as error: + logger.debug("error in check of alphas: %s" % str(error)) + pass #this is a security + if not self.second_process: + for name in type_rwgt: + mg_rwgt_info.append((tag, name, card_diff)) + else: + str_proc = "\n change process ".join([""]+self.second_process) + for name in type_rwgt: + mg_rwgt_info.append((tag, name, str_proc + '\n'+ card_diff)) + else: + if self.second_model: + str_info = "change model %s" % self.second_model + else: + str_info ='' + if self.second_process: + str_info += "\n change process ".join([""]+self.second_process) + if self.dedicated_path: + for k,v in self.dedicated_path.items(): + str_info += "\n change %s %s" % (k,v) + card_diff = str_info + str_info += '\n' + s_new + for name in type_rwgt: + mg_rwgt_info.append((tag, name, str_info)) + + # re-create the banner. + self.banner['initrwgt'] = header_rwgt_other + if self.output_type == 'default': + self.banner['initrwgt'] += '\n\n' + else: + self.banner['initrwgt'] += '\n\n' + for tag, rwgttype, diff in mg_rwgt_info: + if self.inc_sudakov: + try: + sud_order = int(rwgttype[-1]) -1 + sud_order = '10' +rwgttype[-2:] + self.banner['initrwgt'] += '%sscale_%s_sud\n' % \ + (rwgttype, diff, sud_order) + except IndexError: + logger.critical('This is a reweighted event file! Do not reweight with ewsudakov twice') + sys.exit(1) + else: + if tag.isdigit(): + self.banner['initrwgt'] += '%s\n' % \ + (tag, rwgttype, diff) + else: + self.banner['initrwgt'] += '%s\n' % \ + (tag, rwgttype, diff) + self.banner['initrwgt'] += '\n\n' + self.banner['initrwgt'] = self.banner['initrwgt'].replace('\n\n', '\n') + + #logger.info('starts to compute weight for events with the following modification to the param_card:') + #logger.info(card_diff.replace('\n','\nKEEP:')) + try: + self.run_card = banner.Banner(self.banner).charge_card('run_card') + except Exception: + logger.debug('no run card found -- reweight interface') + self.run_card = None + + if self.options['rwgt_name']: + tag_name = self.options['rwgt_name'] + else: + tag_name = 'rwgt_%s' % rewgtid + + self.reweight_card.append(card_diff) + + return param_card_iterator, tag_name + + def write_reweight_card(self,rwgt_path): + """function for collecting all the reweight iterations from the parsed reweight card + and write it out with the explicit 'set BLOCK PARAM VALUE' format""" + if( len(self.reweight_names) != len(self.reweight_card) ): + raise Exception('Mismatch in number of reweight names and reweight cards') + + output_card = '' + + for i, card in enumerate(self.reweight_card): + output_card += 'launch --rwgt_name=%s\n' % self.reweight_names[i] + output_card += card + '\n' + + output_card = output_card.replace('param_card', '').replace(' ', ' ') + + with open(rwgt_path, 'w') as f: + f.write(output_card) + + return + + def do_quit(self, line): + if self.exitted: + return + + self.launch_actual_reweighting() + + self.exitted = True + + if 'init' in self.banner: + cross = 0 + error = 0 + for line in self.banner['init'].split('\n'): + split = line.split() + if len(split) == 4: + cross, error = float(split[0]), float(split[1]) + + if not self.multicore == 'create': + # No print of results for the multicore mode for the one printed on screen + if 'orig' not in self.all_cross_section: + logger.info('Original cross-section: %s +- %s pb' % (cross, error)) + else: + logger.info('Original cross-section: %s +- %s pb (cross-section from sum of weights: %s)' % (cross, error, self.all_cross_section['orig'][0])) + logger.info('Computed cross-section:') + keys = list(self.all_cross_section.keys()) + keys.sort(key=lambda x: str(x)) + for key in keys: + if key == 'orig': + continue + logger.info('%s : %s +- %s pb' % (key[0] if not key[1] else '%s%s' % key, + self.all_cross_section[key][0],self.all_cross_section[key][1] )) + self.terminate_fortran_executables() + + if self.rwgt_dir and self.multicore == False: + self.save_to_pickle() + + with misc.stdchannel_redirected(sys.stdout, os.devnull): + for run_id in self.calculator: + del self.calculator[run_id] + del self.calculator \ No newline at end of file From e442637b0964aee76c90c480c7252e9c81a1cfc4 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Fri, 27 Sep 2024 13:45:41 +0200 Subject: [PATCH 69/76] changed native mg branch from rexCPP to gpucpp --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 7ce3f44a13..6bb7e090be 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,7 @@ [submodule "MG5aMC/mg5amcnlo"] path = MG5aMC/mg5amcnlo url = https://github.com/zeniheisser/mg5amcnlo/ - branch = rexCPP + branch = gpucpp [submodule "MG5aMC/"] url = git@github.com:zeniheisser/mg5amcnlo.git [submodule "MG5aMC"] From eaa2a3bb674d0ccda2e2ba1ae0bf0761e73a2f38 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Mon, 30 Sep 2024 13:26:43 +0200 Subject: [PATCH 70/76] updated submodule to point to latest gpucpp --- MG5aMC/mg5amcnlo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index d7f61db844..af3f0d903b 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit d7f61db844bcd8c0741f777c3fdf0099c6ed1331 +Subproject commit af3f0d903b8cc2914a6b552b365fbed53ef01360 From deb07ae11ff2c7e3396b90ac2815211094ca0c72 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 2 Oct 2024 16:18:37 +0200 Subject: [PATCH 71/76] major restructuring of compilation, such that symbols shared across subprocesses are bound locally. minimal changes to underlying code structure --- added a new default path for the param_card and separated fbridge into header and implementation for external access across compilation units --- .../iolibs/template_files/gpu/Bridge.h | 6 +- .../template_files/gpu/cudacpp_driver.mk | 63 ++----------- .../template_files/gpu/cudacpp_runner.mk | 88 +++---------------- .../iolibs/template_files/gpu/fbridge.cc | 2 +- .../iolibs/template_files/gpu/fbridge.h | 52 +++++++++++ .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 4 +- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py | 6 +- tools/REX/rwgt_runner.cc | 4 +- 8 files changed, 85 insertions(+), 140 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index bcc5552cfe..eba523e7dc 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -266,11 +266,8 @@ namespace mg5amcCpu // FIXME: the process instance can happily go out of scope because it is only needed to read parameters? // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads? CPPProcess process( /*verbose=*/false ); -#ifndef _LIBCOMP_ std::string paramCard = "../../Cards/param_card.dat"; -#else - std::string paramCard = "../Cards/param_card.dat"; -#endif + std::string paramCardTrex = "../Cards/param_card.dat"; /* #ifdef __HIPCC__ if( !std::experimental::filesystem::exists( paramCard ) ) paramCard = "../" + paramCard; @@ -282,6 +279,7 @@ namespace mg5amcCpu //if( !( stat( paramCard.c_str(), &dummyBuffer ) == 0 ) ) paramCard = "../" + paramCard; // auto fileExists = []( std::string& fileName ) { struct stat buffer; return stat( fileName.c_str(), &buffer ) == 0; }; + if( fileExists( paramCardTrex ) ) paramCard = paramCardTrex; // ZW: override param_card.dat to be one dir down since trex runs from the SubProcesses dir directory if( !fileExists( paramCard ) ) paramCard = "../" + paramCard; // bypass std::filesystem #803 process.initProc( paramCard ); } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk index 72360410f9..1b4de8226e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_driver.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts @@ -257,7 +257,7 @@ else endif -# Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (so that there is no need to check/define them again in cudacpp_src.mk) +# Export GPUCC, GPUFLAGS, GPULANGUAGE, GPUSUFFIX (these are needed by both src and rwgt_runners, but should not be overwritten there) export CUDA_HOME export GPUCC export GPUFLAGS @@ -314,37 +314,6 @@ override CXXNAMESUFFIX = _$(CXXNAME) # Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) export CXXNAMESUFFIX -# Dependency on test directory -# Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) -# Outside the madgraph4gpu git repo: by default do not build the tests (optionally use an external or local gtest) -###GTEST_ROOT = /cvmfs/sft.cern.ch/lcg/releases/gtest/1.11.0-21e8c/x86_64-centos8-gcc11-opt/# example of an external gtest installation -###LOCALGTEST = yes# comment this out (or use make LOCALGTEST=yes) to build tests using a local gtest installation -TESTDIRCOMMON = ../../../../test -TESTDIRLOCAL = ../test -ifneq ($(wildcard $(GTEST_ROOT)),) - TESTDIR = -else ifneq ($(LOCALGTEST),) - TESTDIR=$(TESTDIRLOCAL) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else ifneq ($(wildcard ../../../../epochX/cudacpp/CODEGEN),) - TESTDIR = $(TESTDIRCOMMON) - GTEST_ROOT = $(TESTDIR)/googletest/install$(CXXNAMESUFFIX) -else - TESTDIR = -endif -ifneq ($(GTEST_ROOT),) - GTESTLIBDIR = $(GTEST_ROOT)/lib64/ - GTESTLIBS = $(GTESTLIBDIR)/libgtest.a - GTESTINC = -I$(GTEST_ROOT)/include -else - GTESTLIBDIR = - GTESTLIBS = - GTESTINC = -endif -###$(info GTEST_ROOT = $(GTEST_ROOT)) -###$(info LOCALGTEST = $(LOCALGTEST)) -###$(info TESTDIR = $(TESTDIR)) - #------------------------------------------------------------------------------- #=== Configure PowerPC-specific compiler flags for C++ and CUDA/HIP @@ -653,14 +622,6 @@ $(BUILDDIR)/.build.$(TAG): @if [ "$(oldtagsb)" != "" ]; then echo "Cannot build for tag=$(TAG) as old builds exist for other tags:"; echo " $(oldtagsb)"; echo "Please run 'make clean' first\nIf 'make clean' is not enough: run 'make clean USEBUILDDIR=1 AVX=$(AVX) FPTYPE=$(FPTYPE)' or 'make cleanall'"; exit 1; fi @touch $(BUILDDIR)/.build.$(TAG) -# Apply special build flags only to CrossSectionKernel_.o (no fast math, see #117 and #516) -# Added edgecase for HIP compilation -ifeq ($(shell $(CXX) --version | grep ^nvc++),) -$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS := $(filter-out -ffast-math,$(CXXFLAGS)) -$(BUILDDIR)/CrossSectionKernels_cpp.o: CXXFLAGS += -fno-fast-math -$(BUILDDIR)/CrossSectionKernels_$(GPUSUFFIX).o: GPUFLAGS += $(XCOMPILERFLAG) -fno-fast-math -endif - # # Apply special build flags only to check_sa_.o (NVTX in timermap.h, #679) $(BUILDDIR)/rwgt_driver_cpp.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) $(BUILDDIR)/rwgt_driver_gpu.o: CXXFLAGS += $(USE_NVTX) $(CUDA_INC) @@ -688,22 +649,21 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): common (src) library -# commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so - -# $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) -# $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) +commonlib : $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so +$(LIBDIR)/lib$(MG5AMC_COMMONLIB).so: ../src/*.h ../src/*.cc $(BUILDDIR)/.build.$(TAG) + $(MAKE) -C ../src $(MAKEDEBUG) -f $(CUDACPP_SRC_MAKEFILE) #------------------------------------------------------------------------------- #HERE LOOP MAKE OVER P DIRECTORIES AND ADD RWGT_RUNNER_LIBS # Ensure each librwgt.a depends on its directory being built -$(rwgtlib): +$(rwgtlib): $(commonlib) @$(MAKE) -C $(@D) VARIABLE=true # Target (and build rules): C++ and CUDA/HIP standalone executables $(cxx_rwgt): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_rwgt): $(BUILDDIR)/rwgt_driver.o $(rwgtlib) - $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) $(cxx_proclibs) $(rwgtlib) + $(CXX) -o $@ $(BUILDDIR)/rwgt_driver.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) $(rwgtlib) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -714,18 +674,11 @@ $(gpu_rwgt): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX)) endif $(gpu_rwgt): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(gpu_rwgt): $(BUILDDIR)/$(BUILDDIR)/rwgt_driver.o $(rwgtlib) - $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) $(gpu_proclibs) $(rwgtlib) + $(GPUCC) -o $@ $(BUILDDIR)/rwgt_driver.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) $(rwgtlib) endif #------------------------------------------------------------------------------- -# Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%%_fortran.o : %%.f *.inc - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(FC) -I. -c $< -o $@ - -#------------------------------------------------------------------------------- - # Target: build all targets in all BACKEND modes (each BACKEND mode in a separate build directory) # Split the bldall target into separate targets to allow parallel 'make -j bldall' builds # (Obsolete hack, no longer needed as there is no INCDIR: add a fbridge.inc dependency to bldall, to ensure it is only copied once for all BACKEND modes) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk index 8b48c30781..4818a0106a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: use ':=' to ensure that the value of CUDACPP_MAKEFILE is not modified further down after including make_opts @@ -127,10 +127,10 @@ else override CXXNAME = unknown endif ###$(info CXXNAME=$(CXXNAME)) -override CXXNAMESUFFIX = _$(CXXNAME) +# override CXXNAMESUFFIX = _$(CXXNAME) -# Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) -export CXXNAMESUFFIX +# # Export CXXNAMESUFFIX (so that there is no need to check/define it again in cudacpp_test.mk) +# export CXXNAMESUFFIX # Dependency on test directory # Within the madgraph4gpu git repo: by default use a common gtest installation in /test (optionally use an external or local gtest) @@ -428,15 +428,9 @@ override RUNTIME = ifeq ($(GPUCC),) - cxx_checkmain=$(BUILDDIR)/check_cpp.exe - cxx_fcheckmain=$(BUILDDIR)/fcheck_cpp.exe cxx_rwgtlib=$(BUILDDIR)/librwgt_cpp.so - cxx_testmain=$(BUILDDIR)/runTest_cpp.exe else - gpu_checkmain=$(BUILDDIR)/check_$(GPUSUFFIX).exe - gpu_fcheckmain=$(BUILDDIR)/fcheck_$(GPUSUFFIX).exe gpu_rwgtlib=$(BUILDDIR)/librwgt_$(GPUSUFFIX).so - gpu_testmain=$(BUILDDIR)/runTest_$(GPUSUFFIX).exe endif # Explicitly define the default goal (this is not necessary as it is the first target, which is implicitly the default goal) @@ -444,9 +438,9 @@ endif # First target (default goal) ifeq ($(GPUCC),) -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_rwgtlib) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(cxx_rwgtlib) else -all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_rwgtlib) +all.$(TAG): $(BUILDDIR)/.build.$(TAG) $(gpu_rwgtlib) endif # Target (and build options): debug @@ -557,23 +551,11 @@ endif #------------------------------------------------------------------------------- -# Target (and build rules): Fortran include files -###$(INCDIR)/%%.inc : ../%%.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### \cp $< $@ - -#------------------------------------------------------------------------------- - -# Target (and build rules): C++ and CUDA/HIP standalone executables -$(cxx_checkmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_checkmain): $(BUILDDIR)/check_sa_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o - $(CXX) -o $@ $(BUILDDIR)/check_sa_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) - # Target (and build rules): C++ rwgt libraries -cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner_cpp.o $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(cxx_objects_exe) +cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner_cpp.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(BUILDDIR)/fbridge_cpp.o $(cxx_objects_lib) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(cxx_rwgtlib): LIBFLAGS += $(CXXLIBFLAGSRPATH) -$(cxx_rwgtlib): $(BUILDDIR)/rwgt_runner_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o - $(CXX) -shared -o $@ $(BUILDDIR)/rwgt_runner_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) +$(cxx_rwgtlib): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_rwgtfiles) $(cxx_objects_lib) + $(CXX) -shared -Wl,-Bsymbolic -o $@ $(BUILDDIR)/rwgt_runner_cpp.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) $(BUILDDIR)/fbridge_cpp.o $(cxx_objects_lib) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -583,7 +565,7 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(gpu_checkmain): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(gpu_checkmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_checkmain): $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(gp_objects_lib) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(GPUCC) -o $@ $(BUILDDIR)/check_sa_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) ifneq ($(shell $(CXX) --version | grep ^Intel),) $(gpu_rwgtlib): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') @@ -592,53 +574,9 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(gpu_rwgtlib): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(gpu_rwgtlib): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -gpu_rwgtfiles := $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o -$(gpu_rwgtlib): $(gpu_rwgtfiles) $(gpu_objects_lib) - $(GPUCC) -shared -o $@ $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) -endif - -#------------------------------------------------------------------------------- - -# Generic target and build rules: objects from Fortran compilation -$(BUILDDIR)/%%_fortran.o : %%.f *.inc - @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi - $(FC) -I. -c $< -o $@ - -# Generic target and build rules: objects from Fortran compilation -###$(BUILDDIR)/%%_fortran.o : %%.f *.inc -### @if [ ! -d $(INCDIR) ]; then echo "mkdir -p $(INCDIR)"; mkdir -p $(INCDIR); fi -### @if [ ! -d $(BUILDDIR) ]; then echo "mkdir -p $(BUILDDIR)"; mkdir -p $(BUILDDIR); fi -### $(FC) -I. -I$(INCDIR) -c $< -o $@ - -# Target (and build rules): Fortran standalone executables -###$(BUILDDIR)/fcheck_sa_fortran.o : $(INCDIR)/fbridge.inc - -ifeq ($(UNAME_S),Darwin) -$(cxx_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -endif -$(cxx_fcheckmain): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_cpp.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) -ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -lstdc++ -else - $(CXX) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(OMPFLAGS) $(BUILDDIR)/fsampler_cpp.o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) -endif - -ifneq ($(GPUCC),) -ifneq ($(shell $(CXX) --version | grep ^Intel),) -$(gpu_fcheckmain): LIBFLAGS += -lintlc # compile with icpx and link with GPUCC (undefined reference to `_intel_fast_memcpy') -$(gpu_fcheckmain): LIBFLAGS += -lsvml # compile with icpx and link with GPUCC (undefined reference to `__svml_cos4_l9') -endif -ifeq ($(UNAME_S),Darwin) -$(gpu_fcheckmain): LIBFLAGS += -L$(shell dirname $(shell $(FC) --print-file-name libgfortran.dylib)) # add path to libgfortran on Mac #375 -endif -$(gpu_fcheckmain): LIBFLAGS += $(GPULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(gpu_fcheckmain): $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_GPULIB).so $(gpu_objects_exe) -ifneq ($(findstring hipcc,$(GPUCC)),) # link fortran/c++/hip using $FC when hipcc is used #802 - $(FC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -lstdc++ -L$(shell dirname $(shell $(GPUCC) -print-prog-name=clang))/../../lib -lamdhip64 -else - $(GPUCC) -o $@ $(BUILDDIR)/fcheck_sa_fortran.o $(BUILDDIR)/fsampler_$(GPUSUFFIX).o $(LIBFLAGS) -lgfortran -L$(LIBDIR) -l$(MG5AMC_GPULIB) $(gpu_objects_exe) -endif +gpu_rwgtfiles := $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_objects_lib) $(gpu_objects_exe) $(BUILDDIR)/fbridge_$(GPUSUFFIX).o $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o +$(gpu_rwgtlib): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(gpu_rwgtfiles) $(gpu_objects_lib) + $(GPUCC) -shared -Wl,-Bsymbolic -o $@ $(BUILDDIR)/rwgt_runner_$(GPUSUFFIX).o $(LIBFLAGS) -L$(LIBDIR) $(BUILDDIR)/fbridge_$(GPUSUFFIX).o $(gpu_objects_exe) $(gpu_objects_lib) $(BUILDDIR)/CurandRandomNumberKernel_$(GPUSUFFIX).o $(BUILDDIR)/HiprandRandomNumberKernel_$(GPUSUFFIX).o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 8b3f302975..59266fd226 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: S. Roiser (Oct 2021) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Roiser, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2021-2024) for the MG5aMC CUDACPP plugin. #include "Bridge.h" #include "CPPProcess.h" diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h new file mode 100644 index 0000000000..8272386d2c --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h @@ -0,0 +1,52 @@ +// Copyright (C) 2020-2024 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: Z. Wettersten (Oct 2024) for the MG5aMC CUDACPP plugin. + +#include "Bridge.h" +#include "CPPProcess.h" +#include "GpuRuntime.h" + +#ifndef _FBRIDGE_H_ +#define _FBRIDGE_H_ + +extern "C" +{ +#ifdef MGONGPUCPP_GPUIMPL + using namespace mg5amcGpu; +#else + using namespace mg5amcCpu; +#endif + + using FORTRANFPTYPE = double; + + void fbridgecreate_( CppObjectInFortran** ppbridge, const int* pnevtF, const int* pnparF, const int* pnp4F ); + + void fbridgedelete_( CppObjectInFortran** ppbridge ); + + void fbridgesequence_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + const unsigned int* channelIds, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool* pgoodHelOnly ); + + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + const FORTRANFPTYPE* momenta, + const FORTRANFPTYPE* gs, + const FORTRANFPTYPE* rndhel, + const FORTRANFPTYPE* rndcol, + FORTRANFPTYPE* mes, + int* selhel, + int* selcol, + const bool* pgoodHelOnly ); + + void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, + unsigned int* pngoodhel, + unsigned int* pntothel ); + +} +#endif // _FBRIDGE_H_ \ No newline at end of file diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index df4c514d29..c0b9aa04d1 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -111,7 +111,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', - s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', + s+'gpu/fbridge.cc', s+'gpu/fbridge.h', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', s+'gpu/testmisc.cc', s+'gpu/testxxx_cc_ref.txt', s+'gpu/valgrind.h', s+'gpu/perf.py', s+'gpu/profile.sh', @@ -134,7 +134,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', - 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', + 'fbridge.cc', 'fbridge.h', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', # this is generated from a template in Subprocesses but we still link it in P1 diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py index a0585ec125..045025564a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py @@ -238,8 +238,13 @@ def export_driver(self): ff.write(template % replace_dict) ff.close() + def link_makefile(self): + """Link the makefile for the REX reweighting""" + files.ln(pjoin(self.dir_path, 'SubProcesses', 'cudacpp_driver.mk'), starting_dir=pjoin(self.dir_path, 'SubProcesses'), name='makefile') + def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): self.export_driver() + self.link_makefile() return super().finalize(matrix_element, cmdhistory, MG5options, outputflag) class TREX_ReweightInterface(rwgt_interface.ReweightInterface): @@ -349,7 +354,6 @@ def compile(self): nb_core = self.mother.options['nb_core'] if self.mother.options['run_mode'] !=0 else 1 else: nb_core = 1 - files.cp(pjoin(pdir, 'cudacpp_driver.mk'),pjoin(pdir, 'makefile')) misc.compile(cwd=pdir, nb_core=nb_core,mode='cpp') return diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index f7ad7e74a4..079ba32b59 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -15,7 +15,7 @@ #define _LIBCOMP_ #endif #include "rwgt_instance.h" -#include "fbridge.cc" +#include "fbridge.h" // ZW: SET UP NAMESPACE namespace %(process_namespace)s{ @@ -111,4 +111,4 @@ namespace %(process_namespace)s{ return eventSetConstr( process ); } -} \ No newline at end of file +} From 63bbbfc64d0f698e24c1591926ea7a56e4ea6456 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 6 Nov 2024 15:42:51 +0100 Subject: [PATCH 72/76] fixed processes with multiple non-interfering cross sections --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py | 5 +++-- tools/REX/rwgt_runner.cc | 16 ++++++++++++++-- tools/REX/teawREX.cc | 16 ++++++++++++---- 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py index 045025564a..02206248a7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py @@ -656,8 +656,9 @@ def do_quit(self, line): for line in self.banner['init'].split('\n'): split = line.split() if len(split) == 4: - cross, error = float(split[0]), float(split[1]) - + cross += float(split[0]) + error += float(split[1])**2 + error = error**0.5 if not self.multicore == 'create': # No print of results for the multicore mode for the one printed on screen if 'orig' not in self.all_cross_section: diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index 079ba32b59..a5b4043d1e 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -21,6 +21,16 @@ namespace %(process_namespace)s{ //namespace dummy{ + std::vector> getInitPrts(){ + static std::vector> initPrts = {%(init_prt_ids)s}; + return initPrts; + } + + std::vector> getFinPrts(){ + static std::vector> finPrts = {%(fin_prt_ids)s}; + return finPrts; + } + std::shared_ptr> amp( int& nEvt, int& nPar, int& nMom, std::vector& momenta, std::vector& alphaS, std::vector& rndHel, std::vector& rndCol, std::vector& selHel, std::vector& selCol, unsigned int& chanId, bool& goodHel ){ CppObjectInFortran *bridgeInst; auto evalScatAmps = std::make_shared>( nEvt ); @@ -45,8 +55,8 @@ namespace %(process_namespace)s{ } std::shared_ptr> procSort( std::string_view status, std::vector arguments, size_t index ){ - std::vector> initPrts = {%(init_prt_ids)s}; - std::vector> finPrts = {%(fin_prt_ids)s}; + std::vector> initPrts = getInitPrts(); + std::vector> finPrts = getFinPrts(); std::shared_ptr> refOrder; if( index == REX::npos ){ if( status == "-1" ){ @@ -81,11 +91,13 @@ namespace %(process_namespace)s{ bool checkProc( REX::event& process, std::vector& relStats ){ size_t no_evts = %(no_events)s; + auto finPrts = getFinPrts(); for( size_t k = 0 ; k < no_evts ; ++k ){ REX::statSort locSort = [ind = k](std::string_view status, std::vector arguments){ return procSort( status, arguments, ind ); }; auto order = process.getProcOrder( locSort ); + if( order.at("1").size() != finPrts[k].size() ){ continue; } for( size_t j = 0 ; j < relStats.size() ; ++j ){ auto currPts = order.at( relStats[j] ); if( std::find(currPts.begin(), currPts.end(), REX::npos) != currPts.end() ){ break; } diff --git a/tools/REX/teawREX.cc b/tools/REX/teawREX.cc index 99f6d5c59a..d313938748 100644 --- a/tools/REX/teawREX.cc +++ b/tools/REX/teawREX.cc @@ -507,11 +507,14 @@ namespace REX::teaw if( this->ampNorm != 0.0 ){ return; } auto xSecLines = this->lheFile->getInit()->getLines(); if( xSecLines.size() > 1 ){ - std::cout << "\n\033[1;33mWarning: Multiple cross-section lines found in LHE file.\nteawREX only supports single (inclusive) process reweighting.\nWill proceed assuming all events belong to first process type.\033[0m\n"; + std::cout << "\n\033[1;33mWarning: Multiple cross-section lines found in LHE file.\nAssuming total cross section given by sum of all cross sections.\033[0m\n"; } if( xSecLines.size() == 0 ) throw std::runtime_error( "No cross-section information found in LHE file." ); - auto xSec = std::stod(std::string(xSecLines[0]->xsecup)); + double xSec = 0.0; + for( size_t k = 0 ; k < xSecLines.size() ; ++k ){ + xSec += std::stod(std::string(xSecLines[k]->xsecup)); + } double div = 0.0; bool sameWeight = true; for( size_t k = 1 ; k < this->flatWgts->size() - 1 ; k += size_t(flatWgts->size()/21) ){ @@ -658,8 +661,13 @@ namespace REX::teaw double invN = 1. / double(reWgts->at(0)->size()); double sqrtInvN = std::sqrt( invN ); auto xSecLines = this->lheFile->getInit()->getLines(); - double xSec = std::stod(std::string(xSecLines[0]->xsecup)); - double xErr = std::stod(std::string(xSecLines[0]->xerrup)); + double xSec = 0.0; + double xErr = 0.0; + for( size_t k = 0 ; k < xSecLines.size() ; ++k ){ + xSec += std::stod(std::string(xSecLines[k]->xsecup)); + xErr += std::pow(std::stod(std::string(xSecLines[k]->xerrup)),2); + } + xErr = std::sqrt( xErr ); for( size_t k = 0 ; k < reWgts->size() ; ++k ){ double xSecCurr = normXSecs->at(k); auto locWgts = reWgts->at(k); From d9fefe62301fcccad3406660b3ee555e0b5ecad0 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Wed, 6 Nov 2024 16:42:05 +0100 Subject: [PATCH 73/76] uncommented debug statements but set default debug flags to false and added some additional compile time debug flags --- .../PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 2 +- .../iolibs/template_files/gpu/Bridge.h | 31 +++++++++++++------ .../iolibs/template_files/gpu/GpuRuntime.h | 8 ++--- .../gpu/MatrixElementKernels.cc | 21 ++++++++----- .../template_files/gpu/MatrixElementKernels.h | 2 +- .../template_files/gpu/cudacpp_config.mk | 5 +-- .../template_files/gpu/cudacpp_rex_src.mk | 2 +- .../template_files/gpu/cudacpp_runner.mk | 1 + .../iolibs/template_files/gpu/fbridge.cc | 2 +- .../iolibs/template_files/gpu/process_cc.inc | 5 +-- .../gpu/process_function_definitions.inc | 2 +- .../CUDACPP_SA_OUTPUT/model_handling.py | 2 +- tools/REX/rwgt_runner.cc | 4 +-- 13 files changed, 53 insertions(+), 34 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index d23f568f21..3c383aca1f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: O. Mattelaer (Sep 2021) for the MG5aMC CUDACPP plugin. -# Further modified by: O. Mattelaer, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, A. Valassi, Z. Wettersten (2021-2024) for the MG5aMC CUDACPP plugin. # AV - Rename the plugin as CUDACPP_OUTPUT (even if the madgraph4gpu directory is still called CUDACPP_SA_OUTPUT) # This can be used in mg5amcnlo in one of two ways: diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index eba523e7dc..334af23a8e 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: S. Roiser (Nov 2021) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Roiser, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2021-2024) for the MG5aMC CUDACPP plugin. #ifndef BRIDGE_H #define BRIDGE_H 1 @@ -255,19 +255,22 @@ namespace mg5amcCpu throw std::logic_error( "Bridge constructor: FIXME! cannot choose gputhreads" ); // this should never happen! m_gpublocks = m_nevt / m_gputhreads; } - //std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads - // << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; +#ifdef MGONGPU_VERBOSE_BRIDGE + std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads + << ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl; +#endif m_pmek.reset( new MatrixElementKernelDevice( m_devMomentaC, m_devGs, m_devRndHel, m_devRndCol, m_devChannelIds, m_devMEs, m_devSelHel, m_devSelCol, m_gpublocks, m_gputhreads ) ); #else - //std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl; +#ifdef MGONGPU_VERBOSE_BRIDGE + std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl; +#endif m_pmek.reset( new MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstChannelIds, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) ); #endif // MGONGPUCPP_GPUIMPL // Create a process object, read param card and set parameters // FIXME: the process instance can happily go out of scope because it is only needed to read parameters? // FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads? CPPProcess process( /*verbose=*/false ); - std::string paramCard = "../../Cards/param_card.dat"; - std::string paramCardTrex = "../Cards/param_card.dat"; + std::string paramCard = "../Cards/param_card.dat"; // ZW: change default param_card.dat location to one dir down /* #ifdef __HIPCC__ if( !std::experimental::filesystem::exists( paramCard ) ) paramCard = "../" + paramCard; @@ -279,8 +282,12 @@ namespace mg5amcCpu //if( !( stat( paramCard.c_str(), &dummyBuffer ) == 0 ) ) paramCard = "../" + paramCard; // auto fileExists = []( std::string& fileName ) { struct stat buffer; return stat( fileName.c_str(), &buffer ) == 0; }; - if( fileExists( paramCardTrex ) ) paramCard = paramCardTrex; // ZW: override param_card.dat to be one dir down since trex runs from the SubProcesses dir directory - if( !fileExists( paramCard ) ) paramCard = "../" + paramCard; // bypass std::filesystem #803 + size_t paramCardCheck = 2; // ZW: check for paramCard up to 2 directories up + for( size_t k = 0 ; k < paramCardCheck ; ++k ) + { + if( fileExists( paramCard ) ) break; // bypass std::filesystem #803 + paramCard = "../" + paramCard; + } process.initProc( paramCard ); } @@ -349,7 +356,9 @@ namespace mg5amcCpu if( goodHelOnly ) return; m_pmek->computeMatrixElements( useChannelIds ); copyHostFromDevice( m_hstMEs, m_devMEs ); - //flagAbnormalMEs( m_hstMEs.data(), m_nevt ); +#ifdef MGONGPU_VERBOSE_BRIDGE + flagAbnormalMEs( m_hstMEs.data(), m_nevt ); +#endif copyHostFromDevice( m_hstSelHel, m_devSelHel ); copyHostFromDevice( m_hstSelCol, m_devSelCol ); if constexpr( std::is_same_v ) @@ -402,7 +411,9 @@ namespace mg5amcCpu } if( goodHelOnly ) return; m_pmek->computeMatrixElements( useChannelIds ); - //flagAbnormalMEs( m_hstMEs.data(), m_nevt ); +#ifdef MGONGPU_VERBOSE_BRIDGE + flagAbnormalMEs( m_hstMEs.data(), m_nevt ); +#endif if constexpr( std::is_same_v ) { memcpy( mes, m_hstMEs.data(), m_hstMEs.bytes() ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h index 862c2c963f..6663045b3d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: J. Teig (Jun 2023, based on earlier work by S. Roiser) for the MG5aMC CUDACPP plugin. -// Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin. #ifndef MG5AMC_GPURUNTIME_H #define MG5AMC_GPURUNTIME_H 1 @@ -38,7 +38,7 @@ namespace mg5amcGpu // *** FIXME! This will all need to be designed differently when going to multi-GPU nodes! *** struct GpuRuntime final { - GpuRuntime( const bool debug = false ) + GpuRuntime( const bool debug = false ) // ZW: default debug to false : m_debug( debug ) { setUp( m_debug ); } ~GpuRuntime() { tearDown( m_debug ); } GpuRuntime( const GpuRuntime& ) = delete; @@ -50,7 +50,7 @@ namespace mg5amcGpu // Set up CUDA application // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** // Calling cudaSetDevice on startup is useful to properly book-keep the time spent in CUDA initialization - static void setUp( const bool debug = false ) + static void setUp( const bool debug = false ) // ZW: default debug to false { // ** NB: it is useful to call cudaSetDevice, or cudaFree, to properly book-keep the time spent in CUDA initialization // ** NB: otherwise, the first CUDA operation (eg a cudaMemcpyToSymbol in CPPProcess ctor) appears to take much longer! @@ -71,7 +71,7 @@ namespace mg5amcGpu // ** NB: strictly speaking this is not needed when using the CUDA runtime API ** // Calling cudaDeviceReset on shutdown is only needed for checking memory leaks in cuda-memcheck // See https://docs.nvidia.com/cuda/cuda-memcheck/index.html#leak-checking - static void tearDown( const bool debug = false ) + static void tearDown( const bool debug = false ) // ZW: default debug to false { if( debug ) std::cout << "__GpuRuntime: calling GpuDeviceReset()" << std::endl; checkGpu( gpuDeviceReset() ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc index 51c2fb6e22..36833a4c79 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin. -// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2024) for the MG5aMC CUDACPP plugin. #include "MatrixElementKernels.h" @@ -60,7 +60,9 @@ namespace mg5amcCpu #ifdef MGONGPU_CHANNELID_DEBUG MatrixElementKernelBase::dumpNevtProcessedByChannel(); #endif - // MatrixElementKernelBase::dumpSignallingFPEs(); +#ifdef MGONGPU_VERBOSE_FPES + MatrixElementKernelBase::dumpSignallingFPEs(); +#endif } //-------------------------------------------------------------------------- @@ -130,7 +132,9 @@ namespace mg5amcCpu if( std::fetestexcept( FE_OVERFLOW ) ) fpes += " FE_OVERFLOW"; if( std::fetestexcept( FE_UNDERFLOW ) ) fpes += " FE_UNDERFLOW"; //if( std::fetestexcept( FE_INEXACT ) ) fpes += " FE_INEXACT"; // do not print this out: this would almost always signal! - if( fpes != "" ) + if( fpes == "" ) + std::cout << "INFO: No Floating Point Exceptions have been reported" << std::endl; + else std::cerr << "INFO: The following Floating Point Exceptions have been reported:" << fpes << std::endl; } @@ -272,14 +276,15 @@ namespace mg5amcCpu #endif if( verbose ) { - if( tag != "none" ){ - //std::cout << "INFO: The application does not require the host to support any AVX feature" << std::endl; - if( ok && !known ) + if( tag == "none" ) + std::cout << "INFO: The application does not require the host to support any AVX feature" << std::endl; + else if( ok && known ) + std::cout << "INFO: The application is built for " << tag << " and the host supports it" << std::endl; + else if( ok ) std::cout << "WARNING: The application is built for " << tag << " but it is unknown if the host supports it" << std::endl; - else if ( !ok && known ) + else std::cout << "ERROR! The application is built for " << tag << " but the host does not support it" << std::endl; } - } return ok; } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h index 7acff4b308..1a98d5ff80 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/MatrixElementKernels.h @@ -134,7 +134,7 @@ namespace mg5amcCpu // Does this host system support the SIMD used in the matrix element calculation? // [NB: this is private, SIMD vectorization in mg5amc C++ code is currently only used in the ME calculations below MatrixElementKernelHost!] - static bool hostSupportsSIMD( const bool verbose = true ); + static bool hostSupportsSIMD( const bool verbose = false ); // ZW: set verbose to false by default private: diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk index d19491d06f..19cfb7feda 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_config.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: A. Valassi (Mar 2024) for the MG5aMC CUDACPP plugin. -# Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. +# Further modified by: A. Valassi, Z. Wettersten (2024) for the MG5aMC CUDACPP plugin. #------------------------------------------------------------------------------- @@ -13,7 +13,8 @@ ifeq ($(BACKEND),) override BACKEND = gpucpp endif -# Stop immediately if BACKEND=cuda but nvcc is missing +# ZW: gpucpp backend checks if there is a GPU backend available before going to SIMD +# prioritises CUDA over HIP ifeq ($(BACKEND),gpucpp) ifeq ($(shell which nvcc 2>/dev/null),) ifeq ($(shell which hipcc 2>/dev/null),) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk index 9725eae803..de56f08dd7 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_rex_src.mk @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: S. Roiser (Feb 2020) for the MG5aMC CUDACPP plugin. -# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin. #=== Determine the name of this makefile (https://ftp.gnu.org/old-gnu/Manuals/make-3.80/html_node/make_17.html) #=== NB: assume that the same name (e.g. cudacpp.mk, Makefile...) is used in the Subprocess and src directories diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk index 4818a0106a..de08480284 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_runner.mk @@ -552,6 +552,7 @@ endif #------------------------------------------------------------------------------- # Target (and build rules): C++ rwgt libraries +# ZW: the -Bsymbolic flag ensures that function calls will be handled internally by the library, rather than going to global context cxx_rwgtfiles := $(BUILDDIR)/rwgt_runner_cpp.o $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(BUILDDIR)/fbridge_cpp.o $(cxx_objects_lib) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel_cpp.o $(BUILDDIR)/HiprandRandomNumberKernel_cpp.o $(cxx_rwgtlib): LIBFLAGS += $(CXXLIBFLAGSRPATH) $(cxx_rwgtlib): $(LIBDIR)/lib$(MG5AMC_COMMONLIB).so $(cxx_rwgtfiles) $(cxx_objects_lib) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc index 59266fd226..8b3f302975 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.cc @@ -1,7 +1,7 @@ // Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: S. Roiser (Oct 2021) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2021-2024) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Roiser, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. #include "Bridge.h" #include "CPPProcess.h" diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc index 459b539a63..26959bf1e5 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_cc.inc @@ -62,10 +62,11 @@ fpeEnable() //std::cout << "fpeEnable: FE_INVALID is" << ( ( fpes & FE_INVALID ) ? " " : " NOT " ) << "enabled" << std::endl; //std::cout << "fpeEnable: FE_OVERFLOW is" << ( ( fpes & FE_OVERFLOW ) ? " " : " NOT " ) << "enabled" << std::endl; //std::cout << "fpeEnable: FE_UNDERFLOW is" << ( ( fpes & FE_UNDERFLOW ) ? " " : " NOT " ) << "enabled" << std::endl; - constexpr bool enableFPE = true; // this is hardcoded and no longer controlled by getenv( "CUDACPP_RUNTIME_ENABLEFPE" ) + constexpr bool enableFPE = false; // this is hardcoded and no longer controlled by getenv( "CUDACPP_RUNTIME_ENABLEFPE" ) + // ZW: hardcode enableFPE to false if( enableFPE ) { - //std::cout << "INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW" << std::endl; + std::cout << "INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW" << std::endl; feenableexcept( FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW ); // new strategy #831 (do not enable FE_UNDERFLOW) //fpes = fegetexcept(); //std::cout << "fpeEnable: analyse fegetexcept()=" << fpes << std::endl; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc index fe7e6d579c..47322262e4 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/process_function_definitions.inc @@ -117,7 +117,7 @@ namespace mg5amcCpu #else memcpy( cHel, tHel, ncomb * npar * sizeof( short ) ); #endif - //fpeEnable(); // enable SIGFPE traps for Floating Point Exceptions + fpeEnable(); // enable SIGFPE traps for Floating Point Exceptions } //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 84fc3b9f45..78023d2660 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1,7 +1,7 @@ # Copyright (C) 2020-2024 CERN and UCLouvain. # Licensed under the GNU Lesser General Public License (version 3 or later). # Created by: O. Mattelaer (Sep 2021) for the MG5aMC CUDACPP plugin. -# Further modified by: O. Mattelaer, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. +# Further modified by: O. Mattelaer, J. Teig, A. Valassi, Z. Wettersten (2021-2024) for the MG5aMC CUDACPP plugin. import os import sys diff --git a/tools/REX/rwgt_runner.cc b/tools/REX/rwgt_runner.cc index a5b4043d1e..3d70a44076 100644 --- a/tools/REX/rwgt_runner.cc +++ b/tools/REX/rwgt_runner.cc @@ -11,8 +11,8 @@ // A class for reweighting matrix elements for %(process_lines)s //-------------------------------------------------------------------------- -#ifndef _LIBCOMP_ -#define _LIBCOMP_ +#ifndef _TREX_ +#define _TREX_ #endif #include "rwgt_instance.h" #include "fbridge.h" From 3f08dc454a7b1c6fbac14b31ab4563e9f77c186f Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Thu, 5 Dec 2024 15:42:51 +0100 Subject: [PATCH 74/76] fixed trex import to work in the dev structure of having the plugin one directory up --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py index 02206248a7..3284356dcb 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/trex.py @@ -7,10 +7,16 @@ import re import sys import importlib.util + + +# AV - PLUGIN_NAME can be one of PLUGIN/CUDACPP_OUTPUT or MG5aMC_PLUGIN/CUDACPP_OUTPUT +PLUGIN_NAME = __name__.rsplit('.',1)[0] + + SPEC_EXPORTCPP = importlib.util.find_spec('madgraph.iolibs.export_cpp') PLUGIN_export_cpp = importlib.util.module_from_spec(SPEC_EXPORTCPP) SPEC_EXPORTCPP.loader.exec_module(PLUGIN_export_cpp) -sys.modules['PLUGIN.CUDACPP_OUTPUT.PLUGIN_export_cpp'] = PLUGIN_export_cpp # allow 'import PLUGIN.CUDACPP_OUTPUT.PLUGIN_export_cpp' in model_handling.py +sys.modules['%s.PLUGIN_export_cpp'%PLUGIN_NAME] = PLUGIN_export_cpp # allow 'import .PLUGIN_export_cpp' in model_handling.py del SPEC_EXPORTCPP ###print('id(export_cpp)=%s'%id(export_cpp)) ###print('id(PLUGIN_export_cpp)=%s'%id(PLUGIN_export_cpp)) @@ -20,12 +26,16 @@ PLUGINDIR = os.path.dirname( __file__ ) # AV - model_handling includes the custom FileWriter, ALOHAWriter, UFOModelConverter, OneProcessExporter and HelasCallWriter, plus additional patches -import PLUGIN.CUDACPP_OUTPUT.model_handling as model_handling -import PLUGIN.CUDACPP_OUTPUT.output as output +#import PLUGIN.CUDACPP_OUTPUT.model_handling as model_handling +__import__('%s.model_handling'%PLUGIN_NAME) +model_handling = sys.modules['%s.model_handling'%PLUGIN_NAME] +#import PLUGIN.CUDACPP_OUTPUT.output as output +__import__('%s.output'%PLUGIN_NAME) +output = sys.modules['%s.output'%PLUGIN_NAME] # AV - create a plugin-specific logger import logging -logger = logging.getLogger('madgraph.PLUGIN.CUDACPP_OUTPUT.output') +logger = logging.getLogger('madgraph.%s.output'%PLUGIN_NAME) from madgraph import MG5DIR #------------------------------------------------------------------------------------ @@ -681,4 +691,4 @@ def do_quit(self, line): with misc.stdchannel_redirected(sys.stdout, os.devnull): for run_id in self.calculator: del self.calculator[run_id] - del self.calculator \ No newline at end of file + del self.calculator From bc74e37e43818fbf0cae111340abb545af3b3ec8 Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Thu, 5 Dec 2024 15:54:50 +0100 Subject: [PATCH 75/76] fixed formatting to match project --- .../madgraph/iolibs/template_files/gpu/Bridge.h | 4 ++-- .../iolibs/template_files/gpu/GpuRuntime.h | 5 ++++- .../madgraph/iolibs/template_files/gpu/fbridge.h | 14 +++++++------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h index 334af23a8e..6f18ec1d67 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/Bridge.h @@ -283,9 +283,9 @@ namespace mg5amcCpu auto fileExists = []( std::string& fileName ) { struct stat buffer; return stat( fileName.c_str(), &buffer ) == 0; }; size_t paramCardCheck = 2; // ZW: check for paramCard up to 2 directories up - for( size_t k = 0 ; k < paramCardCheck ; ++k ) + for( size_t k = 0; k < paramCardCheck; ++k ) { - if( fileExists( paramCard ) ) break; // bypass std::filesystem #803 + if( fileExists( paramCard ) ) break; // bypass std::filesystem #803 paramCard = "../" + paramCard; } process.initProc( paramCard ); diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h index 6663045b3d..1088c9956f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/GpuRuntime.h @@ -39,7 +39,10 @@ namespace mg5amcGpu struct GpuRuntime final { GpuRuntime( const bool debug = false ) // ZW: default debug to false - : m_debug( debug ) { setUp( m_debug ); } + : m_debug( debug ) + { + setUp( m_debug ); + } ~GpuRuntime() { tearDown( m_debug ); } GpuRuntime( const GpuRuntime& ) = delete; GpuRuntime( GpuRuntime&& ) = delete; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h index 8272386d2c..c227b4ec46 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h @@ -17,13 +17,13 @@ extern "C" using namespace mg5amcCpu; #endif - using FORTRANFPTYPE = double; + using FORTRANFPTYPE = double; - void fbridgecreate_( CppObjectInFortran** ppbridge, const int* pnevtF, const int* pnparF, const int* pnp4F ); + void fbridgecreate_( CppObjectInFortran** ppbridge, const int* pnevtF, const int* pnparF, const int* pnp4F ); - void fbridgedelete_( CppObjectInFortran** ppbridge ); + void fbridgedelete_( CppObjectInFortran** ppbridge ); - void fbridgesequence_( CppObjectInFortran** ppbridge, + void fbridgesequence_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, @@ -34,7 +34,7 @@ extern "C" int* selcol, const bool* pgoodHelOnly ); - void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, + void fbridgesequence_nomultichannel_( CppObjectInFortran** ppbridge, const FORTRANFPTYPE* momenta, const FORTRANFPTYPE* gs, const FORTRANFPTYPE* rndhel, @@ -44,9 +44,9 @@ extern "C" int* selcol, const bool* pgoodHelOnly ); - void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, + void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); } -#endif // _FBRIDGE_H_ \ No newline at end of file +#endif // _FBRIDGE_H_ From 89545ecdd34e88be4d175924ab6cee2fad0ac46e Mon Sep 17 00:00:00 2001 From: Zenny Wettersten Date: Thu, 5 Dec 2024 16:07:05 +0100 Subject: [PATCH 76/76] additional formatting issue --- .../madgraph/iolibs/template_files/gpu/fbridge.h | 1 - 1 file changed, 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h index c227b4ec46..fc83560b18 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/fbridge.h @@ -47,6 +47,5 @@ extern "C" void fbridgegetngoodhel_( CppObjectInFortran** ppbridge, unsigned int* pngoodhel, unsigned int* pntothel ); - } #endif // _FBRIDGE_H_